Import all the Libraries¶

In [1]:
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
from matplotlib.pyplot import show
In [2]:
df=pd.read_csv('Downloads/training_set.csv')
In [3]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallCond    1460 non-null   int64  
 19  YearBuilt      1460 non-null   int64  
 20  YearRemodAdd   1460 non-null   int64  
 21  RoofStyle      1460 non-null   object 
 22  RoofMatl       1460 non-null   object 
 23  Exterior1st    1460 non-null   object 
 24  Exterior2nd    1460 non-null   object 
 25  MasVnrType     1452 non-null   object 
 26  MasVnrArea     1452 non-null   float64
 27  ExterQual      1460 non-null   object 
 28  ExterCond      1460 non-null   object 
 29  Foundation     1460 non-null   object 
 30  BsmtQual       1423 non-null   object 
 31  BsmtCond       1423 non-null   object 
 32  BsmtExposure   1422 non-null   object 
 33  BsmtFinType1   1423 non-null   object 
 34  BsmtFinSF1     1460 non-null   int64  
 35  BsmtFinType2   1422 non-null   object 
 36  BsmtFinSF2     1460 non-null   int64  
 37  BsmtUnfSF      1460 non-null   int64  
 38  TotalBsmtSF    1460 non-null   int64  
 39  Heating        1460 non-null   object 
 40  HeatingQC      1460 non-null   object 
 41  CentralAir     1460 non-null   object 
 42  Electrical     1459 non-null   object 
 43  1stFlrSF       1460 non-null   int64  
 44  2ndFlrSF       1460 non-null   int64  
 45  LowQualFinSF   1460 non-null   int64  
 46  GrLivArea      1460 non-null   int64  
 47  BsmtFullBath   1460 non-null   int64  
 48  BsmtHalfBath   1460 non-null   int64  
 49  FullBath       1460 non-null   int64  
 50  HalfBath       1460 non-null   int64  
 51  BedroomAbvGr   1460 non-null   int64  
 52  KitchenAbvGr   1460 non-null   int64  
 53  KitchenQual    1460 non-null   object 
 54  TotRmsAbvGrd   1460 non-null   int64  
 55  Functional     1460 non-null   object 
 56  Fireplaces     1460 non-null   int64  
 57  FireplaceQu    770 non-null    object 
 58  GarageType     1379 non-null   object 
 59  GarageYrBlt    1379 non-null   float64
 60  GarageFinish   1379 non-null   object 
 61  GarageCars     1460 non-null   int64  
 62  GarageArea     1460 non-null   int64  
 63  GarageQual     1379 non-null   object 
 64  GarageCond     1379 non-null   object 
 65  PavedDrive     1460 non-null   object 
 66  WoodDeckSF     1460 non-null   int64  
 67  OpenPorchSF    1460 non-null   int64  
 68  EnclosedPorch  1460 non-null   int64  
 69  3SsnPorch      1460 non-null   int64  
 70  ScreenPorch    1460 non-null   int64  
 71  PoolArea       1460 non-null   int64  
 72  PoolQC         7 non-null      object 
 73  Fence          281 non-null    object 
 74  MiscFeature    54 non-null     object 
 75  MiscVal        1460 non-null   int64  
 76  MoSold         1460 non-null   int64  
 77  YrSold         1460 non-null   int64  
 78  SaleType       1460 non-null   object 
 79  SaleCondition  1460 non-null   object 
 80  SalePrice      1460 non-null   int64  
dtypes: float64(3), int64(35), object(43)
memory usage: 924.0+ KB
In [4]:
df['SaleCondition'].value_counts()
Out[4]:
Normal     1198
Partial     125
Abnorml     101
Family       20
Alloca       12
AdjLand       4
Name: SaleCondition, dtype: int64
In [5]:
df.shape
Out[5]:
(1460, 81)

Check whether there are any Null values¶

In [6]:
df.isna().sum()
Out[6]:
Id                 0
MSSubClass         0
MSZoning           0
LotFrontage      259
LotArea            0
                ... 
MoSold             0
YrSold             0
SaleType           0
SaleCondition      0
SalePrice          0
Length: 81, dtype: int64

Filling Null values¶

In [7]:
for i in df.columns:
    if (df[i].isna().sum())>0:
        if df[i].dtypes=='object':
            x=df[i].mode()[0]
            df[i]=df[i].fillna(x)
        else:
            x=df[i].mean()
            df[i]=df[i].fillna(x)
In [8]:
df.isna().sum()
Out[8]:
Id               0
MSSubClass       0
MSZoning         0
LotFrontage      0
LotArea          0
                ..
MoSold           0
YrSold           0
SaleType         0
SaleCondition    0
SalePrice        0
Length: 81, dtype: int64

Drop irrelevant columns [Seperating Input Output features (X & Y)]¶

In [9]:
X=df.drop(labels=['SalePrice','Id','LowQualFinSF','MiscVal'],axis=1)
Y=df['SalePrice']
In [10]:
X.shape
Out[10]:
(1460, 77)
In [11]:
Y.shape
Out[11]:
(1460,)

Seperating Categorical & Continuous columns¶

In [12]:
cat=[]
con=[]
for i in X.columns:
    if X[i].dtypes=='object':
        cat.append(i)
    else:
        con.append(i)
print(cat)
print(con)
['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MoSold', 'YrSold']
In [13]:
cat=['MSZoning', 'Street', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'Fence', 'SaleType', 'SaleCondition']
con=['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'MoSold', 'YrSold']

Analysis¶

UNIVARIATE ANALYSIS (Distplot)¶

In [14]:
for i in df.columns:
    if df[i].dtypes=='object':
        df[i].value_counts().plot(kind='bar')
        plt.show()
    else:
        sb.histplot(data=df,x=df[i],kde=True)
        plt.show()

BIVARIATE ANALYSIS (Scatter Plot)¶

In [15]:
for i in df.columns:
    if df[i].dtypes=='object':
        sb.boxplot(data=df,x=df[i],y='SalePrice')
        plt.show()
    else:
        plt.scatter(data=df,x=df[i],y='SalePrice')
        plt.xlabel(i)
        plt.ylabel('SalePrice')
        plt.title(f'{i} vs SalePrice')
        plt.show()

MULTIVARIATE ANALYSIS (Heatmap)¶

In [16]:
a=df.corr()
C:\Users\PRATHMESH\AppData\Local\Temp\ipykernel_288228\962033958.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  a=df.corr()
In [17]:
a
Out[17]:
Id MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 ... WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea MiscVal MoSold YrSold SalePrice
Id 1.000000 0.011156 -0.009601 -0.033226 -0.028365 0.012609 -0.012713 -0.021998 -0.050199 -0.005024 ... -0.029643 -0.000477 0.002889 -0.046635 0.001330 0.057044 -0.006242 0.021172 0.000712 -0.021917
MSSubClass 0.011156 1.000000 -0.357056 -0.139781 0.032628 -0.059316 0.027850 0.040581 0.022895 -0.069836 ... -0.012579 -0.006100 -0.012037 -0.043825 -0.026030 0.008283 -0.007683 -0.013585 -0.021407 -0.084284
LotFrontage -0.009601 -0.357056 1.000000 0.306795 0.234196 -0.052820 0.117598 0.082746 0.179283 0.215828 ... 0.077106 0.137454 0.009790 0.062335 0.037684 0.180868 0.001168 0.010158 0.006768 0.334901
LotArea -0.033226 -0.139781 0.306795 1.000000 0.105806 -0.005636 0.014228 0.013788 0.103960 0.214103 ... 0.171698 0.084774 -0.018340 0.020423 0.043160 0.077672 0.038068 0.001205 -0.014261 0.263843
OverallQual -0.028365 0.032628 0.234196 0.105806 1.000000 -0.091932 0.572323 0.550684 0.410238 0.239666 ... 0.238923 0.308819 -0.113937 0.030371 0.064886 0.065166 -0.031406 0.070815 -0.027347 0.790982
OverallCond 0.012609 -0.059316 -0.052820 -0.005636 -0.091932 1.000000 -0.375983 0.073741 -0.127788 -0.046231 ... -0.003334 -0.032589 0.070356 0.025504 0.054811 -0.001985 0.068777 -0.003511 0.043950 -0.077856
YearBuilt -0.012713 0.027850 0.117598 0.014228 0.572323 -0.375983 1.000000 0.592855 0.314745 0.249503 ... 0.224880 0.188686 -0.387268 0.031355 -0.050364 0.004950 -0.034383 0.012398 -0.013618 0.522897
YearRemodAdd -0.021998 0.040581 0.082746 0.013788 0.550684 0.073741 0.592855 1.000000 0.179186 0.128451 ... 0.205726 0.226298 -0.193919 0.045286 -0.038740 0.005829 -0.010286 0.021490 0.035743 0.507101
MasVnrArea -0.050199 0.022895 0.179283 0.103960 0.410238 -0.127788 0.314745 0.179186 1.000000 0.263582 ... 0.159349 0.124965 -0.109849 0.018795 0.061453 0.011723 -0.029815 -0.005940 -0.008184 0.475241
BsmtFinSF1 -0.005024 -0.069836 0.215828 0.214103 0.239666 -0.046231 0.249503 0.128451 0.263582 1.000000 ... 0.204306 0.111761 -0.102303 0.026451 0.062021 0.140491 0.003571 -0.015727 0.014359 0.386420
BsmtFinSF2 -0.005968 -0.065649 0.043340 0.111170 -0.059119 0.040229 -0.049107 -0.067759 -0.072302 -0.050117 ... 0.067898 0.003093 0.036543 -0.029993 0.088871 0.041709 0.004940 -0.015211 0.031706 -0.011378
BsmtUnfSF -0.007940 -0.140759 0.122156 -0.002618 0.308159 -0.136841 0.149040 0.181133 0.114184 -0.495251 ... -0.005316 0.129005 -0.002538 0.020764 -0.012579 -0.035092 -0.023837 0.034888 -0.041258 0.214479
TotalBsmtSF -0.015415 -0.238518 0.363358 0.260833 0.537808 -0.171098 0.391452 0.291066 0.362452 0.522396 ... 0.232019 0.247264 -0.095478 0.037384 0.084489 0.126053 -0.018479 0.013196 -0.014969 0.613581
1stFlrSF 0.010496 -0.251758 0.414266 0.299475 0.476224 -0.144203 0.281986 0.240379 0.342160 0.445863 ... 0.235459 0.211671 -0.065292 0.056104 0.088758 0.131525 -0.021096 0.031372 -0.013604 0.605852
2ndFlrSF 0.005590 0.307886 0.072483 0.050986 0.295493 0.028942 0.010308 0.140024 0.174019 -0.137079 ... 0.092165 0.208026 0.061989 -0.024358 0.040606 0.081487 0.016197 0.035164 -0.028700 0.319334
LowQualFinSF -0.044230 0.046474 0.036849 0.004779 -0.030429 0.025494 -0.183784 -0.062419 -0.069068 -0.064503 ... -0.025444 0.018251 0.061081 -0.004296 0.026799 0.062157 -0.003793 -0.022174 -0.028921 -0.025606
GrLivArea 0.008273 0.074853 0.368392 0.263116 0.593007 -0.079686 0.199010 0.287389 0.389893 0.208171 ... 0.247433 0.330224 0.009113 0.020643 0.101510 0.170205 -0.002416 0.050240 -0.036526 0.708624
BsmtFullBath 0.002289 0.003491 0.091481 0.158155 0.111098 -0.054942 0.187599 0.119470 0.085055 0.649212 ... 0.175315 0.067341 -0.049911 -0.000106 0.023148 0.067616 -0.023047 -0.025361 0.067049 0.227122
BsmtHalfBath -0.020155 -0.002333 -0.006419 0.048046 -0.040150 0.117821 -0.038162 -0.012337 0.026669 0.067418 ... 0.040161 -0.025324 -0.008555 0.035114 0.032121 0.020025 -0.007367 0.032873 -0.046524 -0.016844
FullBath 0.005587 0.131608 0.180424 0.126031 0.550600 -0.194149 0.468271 0.439046 0.275730 0.058543 ... 0.187703 0.259977 -0.115093 0.035353 -0.008106 0.049604 -0.014290 0.055872 -0.019669 0.560664
HalfBath 0.006784 0.177354 0.048258 0.014259 0.273458 -0.060769 0.242656 0.183331 0.200802 0.004262 ... 0.108080 0.199740 -0.095317 -0.004972 0.072426 0.022381 0.001290 -0.009050 -0.010269 0.284108
BedroomAbvGr 0.037719 -0.023438 0.237023 0.119690 0.101676 0.012980 -0.070651 -0.040581 0.102417 -0.107355 ... 0.046854 0.093810 0.041570 -0.024478 0.044300 0.070703 0.007767 0.046544 -0.036014 0.168213
KitchenAbvGr 0.002951 0.281721 -0.005805 -0.017784 -0.183882 -0.087001 -0.174800 -0.149598 -0.037364 -0.081007 ... -0.090130 -0.070091 0.037312 -0.024600 -0.051613 -0.014525 0.062341 0.026589 0.031687 -0.135907
TotRmsAbvGrd 0.027239 0.040380 0.320146 0.190015 0.427452 -0.057583 0.095589 0.191740 0.280027 0.044316 ... 0.165984 0.234192 0.004151 -0.006683 0.059383 0.083757 0.024763 0.036907 -0.034516 0.533723
Fireplaces -0.019772 -0.045569 0.235755 0.271364 0.396765 -0.023820 0.147716 0.112581 0.247906 0.260011 ... 0.200019 0.169405 -0.024822 0.011257 0.184530 0.095074 0.001409 0.046357 -0.024096 0.466929
GarageYrBlt 0.000070 0.080187 0.064324 -0.024812 0.518018 -0.306169 0.780555 0.618130 0.249367 0.150338 ... 0.220623 0.218490 -0.285882 0.023534 -0.075256 -0.014499 -0.031853 0.005173 -0.000987 0.470177
GarageCars 0.016570 -0.040110 0.269729 0.154871 0.600671 -0.185758 0.537850 0.420622 0.363778 0.224054 ... 0.226342 0.213569 -0.151434 0.035765 0.050494 0.020934 -0.043080 0.040522 -0.039117 0.640409
GarageArea 0.017634 -0.098672 0.323663 0.180403 0.562022 -0.151521 0.478954 0.371600 0.372567 0.296970 ... 0.224666 0.241435 -0.121777 0.035087 0.051412 0.061047 -0.027400 0.027974 -0.027378 0.623431
WoodDeckSF -0.029643 -0.012579 0.077106 0.171698 0.238923 -0.003334 0.224880 0.205726 0.159349 0.204306 ... 1.000000 0.058661 -0.125989 -0.032771 -0.074181 0.073378 -0.009551 0.021011 0.022270 0.324413
OpenPorchSF -0.000477 -0.006100 0.137454 0.084774 0.308819 -0.032589 0.188686 0.226298 0.124965 0.111761 ... 0.058661 1.000000 -0.093079 -0.005842 0.074304 0.060762 -0.018584 0.071255 -0.057619 0.315856
EnclosedPorch 0.002889 -0.012037 0.009790 -0.018340 -0.113937 0.070356 -0.387268 -0.193919 -0.109849 -0.102303 ... -0.125989 -0.093079 1.000000 -0.037305 -0.082864 0.054203 0.018361 -0.028887 -0.009916 -0.128578
3SsnPorch -0.046635 -0.043825 0.062335 0.020423 0.030371 0.025504 0.031355 0.045286 0.018795 0.026451 ... -0.032771 -0.005842 -0.037305 1.000000 -0.031436 -0.007992 0.000354 0.029474 0.018645 0.044584
ScreenPorch 0.001330 -0.026030 0.037684 0.043160 0.064886 0.054811 -0.050364 -0.038740 0.061453 0.062021 ... -0.074181 0.074304 -0.082864 -0.031436 1.000000 0.051307 0.031946 0.023217 0.010694 0.111447
PoolArea 0.057044 0.008283 0.180868 0.077672 0.065166 -0.001985 0.004950 0.005829 0.011723 0.140491 ... 0.073378 0.060762 0.054203 -0.007992 0.051307 1.000000 0.029669 -0.033737 -0.059689 0.092404
MiscVal -0.006242 -0.007683 0.001168 0.038068 -0.031406 0.068777 -0.034383 -0.010286 -0.029815 0.003571 ... -0.009551 -0.018584 0.018361 0.000354 0.031946 0.029669 1.000000 -0.006495 0.004906 -0.021190
MoSold 0.021172 -0.013585 0.010158 0.001205 0.070815 -0.003511 0.012398 0.021490 -0.005940 -0.015727 ... 0.021011 0.071255 -0.028887 0.029474 0.023217 -0.033737 -0.006495 1.000000 -0.145721 0.046432
YrSold 0.000712 -0.021407 0.006768 -0.014261 -0.027347 0.043950 -0.013618 0.035743 -0.008184 0.014359 ... 0.022270 -0.057619 -0.009916 0.018645 0.010694 -0.059689 0.004906 -0.145721 1.000000 -0.028923
SalePrice -0.021917 -0.084284 0.334901 0.263843 0.790982 -0.077856 0.522897 0.507101 0.475241 0.386420 ... 0.324413 0.315856 -0.128578 0.044584 0.111447 0.092404 -0.021190 0.046432 -0.028923 1.000000

38 rows × 38 columns

In [18]:
sb.heatmap(a,annot=True)
Out[18]:
<Axes: >

Standardisation of Continuous¶

In [19]:
from sklearn.preprocessing import StandardScaler
In [20]:
ss=StandardScaler()
In [21]:
X1=pd.DataFrame(ss.fit_transform(X[con]),columns=con)
In [22]:
X1
Out[22]:
MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 BsmtFinSF2 ... GarageYrBlt GarageCars GarageArea WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch MoSold YrSold
0 0.073375 -0.229372 -0.207142 0.651479 -0.517200 1.050994 0.878668 0.511418 0.575425 -0.288653 ... 1.021157 0.311725 0.351000 -0.752176 0.216503 -0.359325 -0.116339 -0.270208 -1.599111 0.138777
1 -0.872563 0.451936 -0.091886 -0.071836 2.179628 0.156734 -0.429577 -0.574410 1.171992 -0.288653 ... -0.104483 0.311725 -0.060731 1.626195 -0.704483 -0.359325 -0.116339 -0.270208 -0.489110 -0.614439
2 0.073375 -0.093110 0.073480 0.651479 -0.517200 0.984752 0.830215 0.323060 0.092907 -0.288653 ... 0.937776 0.311725 0.631726 -0.752176 -0.070361 -0.359325 -0.116339 -0.270208 0.990891 0.138777
3 0.309859 -0.456474 -0.096897 0.651479 -0.517200 -1.863632 -0.720298 -0.574410 -0.499274 -0.288653 ... 0.812705 1.650307 0.790804 -0.752176 -0.176048 4.092524 -0.116339 -0.270208 -1.599111 -1.367655
4 0.073375 0.633618 0.375148 1.374795 -0.517200 0.951632 0.733308 1.364570 0.463568 -0.288653 ... 0.896086 1.650307 1.698485 0.780197 0.563760 -0.359325 -0.116339 -0.270208 2.100892 0.138777
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1455 0.073375 -0.365633 -0.260560 -0.071836 -0.517200 0.918511 0.733308 -0.574410 -0.973018 -0.288653 ... 0.854395 0.311725 -0.060731 -0.752176 -0.100558 -0.359325 -0.116339 -0.270208 0.620891 -0.614439
1456 -0.872563 0.679039 0.266407 -0.071836 0.381743 0.222975 0.151865 0.084843 0.759659 0.722112 ... -0.021102 0.311725 0.126420 2.033231 -0.704483 -0.359325 -0.116339 -0.270208 -1.599111 1.645210
1457 0.309859 -0.183951 -0.147810 0.651479 3.078570 -1.002492 1.024029 -0.574410 -0.369871 -0.288653 ... -1.563645 -1.026858 -1.033914 -0.752176 0.201405 -0.359325 -0.116339 -0.270208 -0.489110 1.645210
1458 -0.872563 -0.093110 -0.080160 -0.795151 0.381743 -0.704406 0.539493 -0.574410 -0.865548 6.092188 ... -1.188432 -1.026858 -1.090059 2.168910 -0.704483 1.473789 -0.116339 -0.270208 -0.859110 1.645210
1459 -0.872563 0.224833 -0.058112 -0.795151 0.381743 -0.207594 -0.962566 -0.574410 0.847389 1.509640 ... -0.563077 -1.026858 -0.921624 5.121921 0.322190 -0.359325 -0.116339 -0.270208 -0.119110 0.138777

1460 rows × 33 columns

Encoding ( converting Categorical cloumns in continous)¶

ONE HOT ENCODING¶

In [26]:
X2=pd.get_dummies(X[cat])
In [27]:
X2
Out[27]:
MSZoning_C (all) MSZoning_FV MSZoning_RH MSZoning_RL MSZoning_RM Street_Grvl Street_Pave LotShape_IR1 LotShape_IR2 LotShape_IR3 ... SaleType_ConLw SaleType_New SaleType_Oth SaleType_WD SaleCondition_Abnorml SaleCondition_AdjLand SaleCondition_Alloca SaleCondition_Family SaleCondition_Normal SaleCondition_Partial
0 0 0 0 1 0 0 1 0 0 0 ... 0 0 0 1 0 0 0 0 1 0
1 0 0 0 1 0 0 1 0 0 0 ... 0 0 0 1 0 0 0 0 1 0
2 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
3 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 1 0 0 0 0 0
4 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1455 0 0 0 1 0 0 1 0 0 0 ... 0 0 0 1 0 0 0 0 1 0
1456 0 0 0 1 0 0 1 0 0 0 ... 0 0 0 1 0 0 0 0 1 0
1457 0 0 0 1 0 0 1 0 0 0 ... 0 0 0 1 0 0 0 0 1 0
1458 0 0 0 1 0 0 1 0 0 0 ... 0 0 0 1 0 0 0 0 1 0
1459 0 0 0 1 0 0 1 0 0 0 ... 0 0 0 1 0 0 0 0 1 0

1460 rows × 243 columns

In [28]:
X2.columns
Out[28]:
Index(['MSZoning_C (all)', 'MSZoning_FV', 'MSZoning_RH', 'MSZoning_RL',
       'MSZoning_RM', 'Street_Grvl', 'Street_Pave', 'LotShape_IR1',
       'LotShape_IR2', 'LotShape_IR3',
       ...
       'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD',
       'SaleCondition_Abnorml', 'SaleCondition_AdjLand',
       'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal',
       'SaleCondition_Partial'],
      dtype='object', length=243)
In [29]:
df.SaleCondition.value_counts()
Out[29]:
Normal     1198
Partial     125
Abnorml     101
Family       20
Alloca       12
AdjLand       4
Name: SaleCondition, dtype: int64

DETECTING & TREATING OUTLIERS¶

In [35]:
out=[]
for i in con:
    out.extend((X1[(X1[i]>3) | (X1[i]<-3)]).index)
print(out)
[9, 48, 93, 125, 165, 246, 291, 300, 312, 335, 411, 488, 520, 535, 635, 637, 703, 705, 713, 861, 969, 985, 1030, 1062, 1144, 1186, 1190, 1266, 1393, 1416, 171, 197, 231, 277, 313, 446, 807, 909, 934, 1107, 1127, 1173, 1182, 1211, 1298, 1337, 53, 249, 313, 335, 384, 451, 457, 661, 706, 769, 848, 1298, 1396, 375, 533, 88, 185, 191, 218, 241, 250, 304, 375, 378, 398, 461, 508, 519, 583, 676, 703, 726, 745, 980, 991, 1123, 1149, 1213, 1268, 1327, 1352, 1435, 1457, 304, 630, 747, 1132, 1137, 1349, 37, 58, 70, 105, 115, 161, 169, 178, 224, 297, 349, 403, 477, 517, 523, 654, 691, 718, 755, 763, 798, 808, 825, 898, 981, 1111, 1169, 1228, 1289, 1298, 1373, 1417, 70, 178, 523, 898, 1182, 1298, 24, 52, 113, 116, 153, 166, 233, 253, 260, 263, 271, 273, 313, 322, 355, 414, 440, 446, 470, 493, 542, 548, 577, 586, 599, 666, 697, 764, 785, 828, 842, 854, 888, 918, 923, 924, 1040, 1059, 1077, 1152, 1220, 1253, 1299, 1308, 1320, 1369, 1387, 1418, 1445, 1458, 137, 224, 278, 477, 496, 581, 678, 774, 798, 932, 1267, 224, 332, 440, 496, 523, 691, 1044, 1182, 1298, 1373, 224, 440, 496, 523, 529, 691, 898, 1024, 1044, 1182, 1298, 1373, 304, 691, 1169, 1182, 118, 185, 197, 304, 496, 523, 608, 635, 691, 769, 798, 1169, 1182, 1268, 1298, 1353, 53, 188, 313, 326, 335, 420, 454, 588, 634, 738, 807, 921, 942, 1163, 1270, 1298, 1, 26, 33, 37, 41, 50, 93, 116, 129, 176, 197, 201, 213, 215, 218, 245, 249, 251, 253, 298, 299, 314, 330, 352, 358, 367, 414, 421, 426, 499, 504, 558, 574, 576, 580, 597, 611, 628, 633, 658, 691, 697, 717, 741, 743, 745, 814, 828, 892, 920, 925, 931, 944, 952, 953, 954, 1006, 1029, 1041, 1047, 1052, 1055, 1069, 1072, 1076, 1080, 1103, 1118, 1123, 1149, 1156, 1181, 1213, 1225, 1276, 1287, 1327, 1335, 1350, 1389, 1405, 1415, 188, 298, 597, 624, 628, 921, 1154, 1163, 1230, 1283, 1350, 1450, 53, 144, 189, 291, 330, 570, 634, 635, 843, 897, 1163, 1213, 1270, 1350, 8, 9, 17, 39, 48, 74, 78, 93, 102, 137, 144, 165, 188, 246, 330, 342, 420, 441, 454, 488, 505, 520, 529, 570, 634, 635, 637, 676, 703, 705, 728, 736, 778, 809, 843, 886, 894, 897, 910, 913, 921, 940, 942, 943, 954, 955, 984, 1003, 1011, 1030, 1062, 1090, 1163, 1186, 1216, 1230, 1232, 1266, 1275, 1283, 1292, 1336, 1350, 1391, 1393, 1412, 1416, 1450, 185, 635, 769, 803, 897, 910, 1031, 1173, 1230, 1298, 1350, 1386, 166, 309, 605, 642, 1298, 93, 653, 178, 581, 664, 825, 1061, 1190, 1298, 53, 64, 166, 169, 335, 343, 357, 480, 661, 769, 828, 848, 893, 961, 974, 1044, 1068, 1210, 1312, 1313, 1423, 1459, 28, 185, 293, 495, 499, 523, 583, 591, 645, 664, 666, 713, 735, 775, 784, 807, 854, 875, 947, 961, 996, 1184, 1193, 1292, 1298, 1328, 1369, 3, 7, 154, 197, 260, 306, 314, 325, 328, 358, 365, 380, 459, 462, 496, 520, 577, 630, 648, 653, 660, 662, 718, 720, 747, 799, 813, 836, 840, 918, 939, 945, 1013, 1030, 1081, 1119, 1139, 1150, 1152, 1185, 1197, 1202, 1248, 1266, 1326, 1360, 1382, 1393, 1419, 1439, 1445, 5, 55, 120, 129, 159, 182, 187, 205, 237, 258, 280, 546, 704, 726, 744, 889, 941, 1080, 1156, 1161, 1181, 1346, 1437, 46, 72, 80, 104, 176, 185, 189, 196, 289, 297, 312, 339, 351, 359, 360, 366, 400, 426, 471, 475, 550, 605, 618, 625, 647, 673, 764, 769, 785, 795, 803, 828, 830, 854, 859, 887, 888, 907, 919, 944, 1037, 1055, 1067, 1070, 1154, 1171, 1184, 1228, 1282, 1293, 1301, 1320, 1328, 1386, 1414]
In [36]:
import numpy as np
In [37]:
outliers=np.unique(out)
In [38]:
outliers
Out[38]:
array([   1,    3,    5,    7,    8,    9,   17,   24,   26,   28,   33,
         37,   39,   41,   46,   48,   50,   52,   53,   55,   58,   64,
         70,   72,   74,   78,   80,   88,   93,  102,  104,  105,  113,
        115,  116,  118,  120,  125,  129,  137,  144,  153,  154,  159,
        161,  165,  166,  169,  171,  176,  178,  182,  185,  187,  188,
        189,  191,  196,  197,  201,  205,  213,  215,  218,  224,  231,
        233,  237,  241,  245,  246,  249,  250,  251,  253,  258,  260,
        263,  271,  273,  277,  278,  280,  289,  291,  293,  297,  298,
        299,  300,  304,  306,  309,  312,  313,  314,  322,  325,  326,
        328,  330,  332,  335,  339,  342,  343,  349,  351,  352,  355,
        357,  358,  359,  360,  365,  366,  367,  375,  378,  380,  384,
        398,  400,  403,  411,  414,  420,  421,  426,  440,  441,  446,
        451,  454,  457,  459,  461,  462,  470,  471,  475,  477,  480,
        488,  493,  495,  496,  499,  504,  505,  508,  517,  519,  520,
        523,  529,  533,  535,  542,  546,  548,  550,  558,  570,  574,
        576,  577,  580,  581,  583,  586,  588,  591,  597,  599,  605,
        608,  611,  618,  624,  625,  628,  630,  633,  634,  635,  637,
        642,  645,  647,  648,  653,  654,  658,  660,  661,  662,  664,
        666,  673,  676,  678,  691,  697,  703,  704,  705,  706,  713,
        717,  718,  720,  726,  728,  735,  736,  738,  741,  743,  744,
        745,  747,  755,  763,  764,  769,  774,  775,  778,  784,  785,
        795,  798,  799,  803,  807,  808,  809,  813,  814,  825,  828,
        830,  836,  840,  842,  843,  848,  854,  859,  861,  875,  886,
        887,  888,  889,  892,  893,  894,  897,  898,  907,  909,  910,
        913,  918,  919,  920,  921,  923,  924,  925,  931,  932,  934,
        939,  940,  941,  942,  943,  944,  945,  947,  952,  953,  954,
        955,  961,  969,  974,  980,  981,  984,  985,  991,  996, 1003,
       1006, 1011, 1013, 1024, 1029, 1030, 1031, 1037, 1040, 1041, 1044,
       1047, 1052, 1055, 1059, 1061, 1062, 1067, 1068, 1069, 1070, 1072,
       1076, 1077, 1080, 1081, 1090, 1103, 1107, 1111, 1118, 1119, 1123,
       1127, 1132, 1137, 1139, 1144, 1149, 1150, 1152, 1154, 1156, 1161,
       1163, 1169, 1171, 1173, 1181, 1182, 1184, 1185, 1186, 1190, 1193,
       1197, 1202, 1210, 1211, 1213, 1216, 1220, 1225, 1228, 1230, 1232,
       1248, 1253, 1266, 1267, 1268, 1270, 1275, 1276, 1282, 1283, 1287,
       1289, 1292, 1293, 1298, 1299, 1301, 1308, 1312, 1313, 1320, 1326,
       1327, 1328, 1335, 1336, 1337, 1346, 1349, 1350, 1352, 1353, 1360,
       1369, 1373, 1382, 1386, 1387, 1389, 1391, 1393, 1396, 1405, 1412,
       1414, 1415, 1416, 1417, 1418, 1419, 1423, 1435, 1437, 1439, 1445,
       1450, 1457, 1458, 1459])

Droping Outliers from X & Y¶

In [39]:
X1.drop(index=outliers,axis=0,inplace=True)
X2.drop(index=outliers,axis=0,inplace=True)
Y.drop(index=outliers,axis=0,inplace=True)
In [40]:
X1.shape
Out[40]:
(1038, 33)
In [41]:
X2.shape
Out[41]:
(1038, 243)
In [42]:
Y.shape
Out[42]:
(1038,)

Join Cat & Con¶

In [43]:
Xnew=X1.join(X2)
In [44]:
Xnew.shape
Out[44]:
(1038, 276)

Reindexing X & Y¶

In [45]:
Xnew.index=range(0,1038)
Y.index=range(0,1038)
In [46]:
Xnew.head()
Out[46]:
MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 BsmtFinSF2 ... SaleType_ConLw SaleType_New SaleType_Oth SaleType_WD SaleCondition_Abnorml SaleCondition_AdjLand SaleCondition_Alloca SaleCondition_Family SaleCondition_Normal SaleCondition_Partial
0 0.073375 -0.229372 -0.207142 0.651479 -0.5172 1.050994 0.878668 0.511418 0.575425 -0.288653 ... 0 0 0 1 0 0 0 0 1 0
1 0.073375 -0.093110 0.073480 0.651479 -0.5172 0.984752 0.830215 0.323060 0.092907 -0.288653 ... 0 0 0 1 0 0 0 0 1 0
2 0.073375 0.633618 0.375148 1.374795 -0.5172 0.951632 0.733308 1.364570 0.463568 -0.288653 ... 0 0 0 1 0 0 0 0 1 0
3 -0.872563 0.224833 -0.043379 1.374795 -0.5172 1.084115 0.975575 0.456019 2.029558 -0.288653 ... 0 0 0 1 0 0 0 0 1 0
4 -0.872563 -0.002269 0.068469 -0.795151 -0.5172 -0.207594 -0.962566 -0.574410 1.014077 -0.288653 ... 0 0 0 1 0 0 0 0 1 0

5 rows × 276 columns

In [47]:
Xnew.columns
Out[47]:
Index(['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
       ...
       'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD',
       'SaleCondition_Abnorml', 'SaleCondition_AdjLand',
       'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal',
       'SaleCondition_Partial'],
      dtype='object', length=276)
In [48]:
from sklearn.model_selection import train_test_split
In [49]:
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.3,random_state=54)
In [50]:
xtrain.shape
Out[50]:
(726, 276)
In [51]:
xtrain.columns
Out[51]:
Index(['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
       ...
       'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD',
       'SaleCondition_Abnorml', 'SaleCondition_AdjLand',
       'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal',
       'SaleCondition_Partial'],
      dtype='object', length=276)
In [52]:
ytrain.shape
Out[52]:
(726,)
In [53]:
ytrain.info()
<class 'pandas.core.series.Series'>
Int64Index: 726 entries, 24 to 898
Series name: SalePrice
Non-Null Count  Dtype
--------------  -----
726 non-null    int64
dtypes: int64(1)
memory usage: 11.3 KB
In [54]:
from statsmodels.api import OLS,add_constant
In [55]:
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
In [56]:
model.summary()
Out[56]:
OLS Regression Results
Dep. Variable: SalePrice R-squared: 0.953
Model: OLS Adj. R-squared: 0.935
Method: Least Squares F-statistic: 52.82
Date: Sat, 26 Aug 2023 Prob (F-statistic): 1.05e-259
Time: 11:22:09 Log-Likelihood: -7964.7
No. Observations: 726 AIC: 1.633e+04
Df Residuals: 525 BIC: 1.725e+04
Df Model: 200
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
MSSubClass -1.315e+04 6547.419 -2.009 0.045 -2.6e+04 -290.031
LotFrontage -260.4685 1314.618 -0.198 0.843 -2843.026 2322.089
LotArea 1.287e+04 3440.242 3.741 0.000 6110.629 1.96e+04
OverallQual 8283.9104 1678.468 4.935 0.000 4986.571 1.16e+04
OverallCond 5896.5633 1172.848 5.028 0.000 3592.512 8200.614
YearBuilt 9837.1789 3004.526 3.274 0.001 3934.810 1.57e+04
YearRemodAdd 1206.9199 1408.688 0.857 0.392 -1560.438 3974.277
MasVnrArea 1473.3009 1670.927 0.882 0.378 -1809.224 4755.825
BsmtFinSF1 6388.6517 1281.327 4.986 0.000 3871.493 8905.810
BsmtFinSF2 5896.6564 3122.203 1.889 0.059 -236.889 1.2e+04
BsmtUnfSF -33.5562 1076.981 -0.031 0.975 -2149.277 2082.164
TotalBsmtSF 8776.4351 1717.973 5.109 0.000 5401.489 1.22e+04
1stFlrSF 4766.9636 1.26e+04 0.378 0.705 -2e+04 2.95e+04
2ndFlrSF 1.122e+04 1.38e+04 0.816 0.415 -1.58e+04 3.82e+04
GrLivArea 1.709e+04 1.7e+04 1.007 0.314 -1.63e+04 5.04e+04
BsmtFullBath 397.9571 1174.296 0.339 0.735 -1908.940 2704.854
BsmtHalfBath -3884.5450 573.727 -6.771 0.000 -5011.627 -2757.463
FullBath 4312.3842 1546.375 2.789 0.005 1274.542 7350.226
HalfBath 1391.3756 1315.070 1.058 0.291 -1192.070 3974.821
BedroomAbvGr -913.1800 1426.761 -0.640 0.522 -3716.042 1889.682
KitchenAbvGr -3407.4397 503.261 -6.771 0.000 -4396.092 -2418.788
TotRmsAbvGrd -1036.6683 1827.880 -0.567 0.571 -4627.526 2554.189
Fireplaces 3305.4939 1186.072 2.787 0.006 975.464 5635.523
GarageYrBlt -262.5252 1552.134 -0.169 0.866 -3311.682 2786.631
GarageCars 2592.5982 1886.022 1.375 0.170 -1112.478 6297.675
GarageArea 3241.3391 1964.675 1.650 0.100 -618.251 7100.929
WoodDeckSF 4139.0441 939.410 4.406 0.000 2293.580 5984.508
OpenPorchSF 1364.6058 1117.353 1.221 0.223 -830.427 3559.638
EnclosedPorch 314.8829 1235.887 0.255 0.799 -2113.009 2742.775
3SsnPorch -1.9e+04 2.3e+04 -0.827 0.409 -6.41e+04 2.61e+04
ScreenPorch 357.1218 1436.578 0.249 0.804 -2465.025 3179.268
MoSold 104.2274 756.613 0.138 0.890 -1382.133 1590.587
YrSold 140.5732 775.208 0.181 0.856 -1382.318 1663.464
MSZoning_C (all) -1.973e+04 9540.011 -2.068 0.039 -3.85e+04 -986.224
MSZoning_FV 2.463e+04 6400.234 3.848 0.000 1.21e+04 3.72e+04
MSZoning_RH -4637.8667 7896.246 -0.587 0.557 -2.01e+04 1.09e+04
MSZoning_RL 4407.9207 3951.065 1.116 0.265 -3353.917 1.22e+04
MSZoning_RM 1.145e+04 4259.858 2.687 0.007 3078.333 1.98e+04
Street_Grvl 1.687e+04 9491.563 1.777 0.076 -1777.977 3.55e+04
Street_Pave -753.7680 8763.872 -0.086 0.931 -1.8e+04 1.65e+04
LotShape_IR1 5562.8497 3350.516 1.660 0.097 -1019.216 1.21e+04
LotShape_IR2 4869.2742 5034.150 0.967 0.334 -5020.277 1.48e+04
LotShape_IR3 -242.9744 8289.449 -0.029 0.977 -1.65e+04 1.6e+04
LotShape_Reg 5925.2134 3525.365 1.681 0.093 -1000.340 1.29e+04
LandContour_Bnk 7995.1203 4274.066 1.871 0.062 -401.252 1.64e+04
LandContour_HLS 1.781e+04 4086.966 4.358 0.000 9784.082 2.58e+04
LandContour_Low -1.705e+04 5523.692 -3.086 0.002 -2.79e+04 -6195.030
LandContour_Lvl 7352.6291 2642.968 2.782 0.006 2160.536 1.25e+04
Utilities_AllPub 1.611e+04 2380.005 6.771 0.000 1.14e+04 2.08e+04
Utilities_NoSeWa -1.609e-10 6e-11 -2.682 0.008 -2.79e-10 -4.3e-11
LotConfig_Corner 8365.4851 3655.809 2.288 0.023 1183.675 1.55e+04
LotConfig_CulDSac 1.133e+04 4135.856 2.739 0.006 3205.118 1.95e+04
LotConfig_FR2 -1040.4242 4513.464 -0.231 0.818 -9907.092 7826.243
LotConfig_FR3 -1.071e+04 1.27e+04 -0.845 0.398 -3.56e+04 1.42e+04
LotConfig_Inside 8166.3588 3455.492 2.363 0.018 1378.070 1.5e+04
LandSlope_Gtl 1.0049 6236.634 0.000 1.000 -1.23e+04 1.23e+04
LandSlope_Mod -2274.7517 6781.438 -0.335 0.737 -1.56e+04 1.1e+04
LandSlope_Sev 1.839e+04 1.26e+04 1.454 0.146 -6448.529 4.32e+04
Neighborhood_Blmngtn 2.253e+04 7115.645 3.166 0.002 8549.491 3.65e+04
Neighborhood_Blueste -6630.9441 1.79e+04 -0.370 0.712 -4.18e+04 2.86e+04
Neighborhood_BrDale 2551.2707 9232.445 0.276 0.782 -1.56e+04 2.07e+04
Neighborhood_BrkSide -2815.8276 5833.370 -0.483 0.630 -1.43e+04 8643.786
Neighborhood_ClearCr 3044.3355 7729.911 0.394 0.694 -1.21e+04 1.82e+04
Neighborhood_CollgCr -4170.0792 3149.905 -1.324 0.186 -1.04e+04 2017.887
Neighborhood_Crawfor 2.082e+04 6436.559 3.234 0.001 8172.909 3.35e+04
Neighborhood_Edwards -9957.5330 3757.448 -2.650 0.008 -1.73e+04 -2576.053
Neighborhood_Gilbert -6617.4477 4085.241 -1.620 0.106 -1.46e+04 1407.980
Neighborhood_IDOTRR -8494.0362 7401.705 -1.148 0.252 -2.3e+04 6046.561
Neighborhood_MeadowV -1.155e+04 8766.009 -1.318 0.188 -2.88e+04 5668.167
Neighborhood_Mitchel -8888.8204 4481.737 -1.983 0.048 -1.77e+04 -84.481
Neighborhood_NAmes -8133.5656 3157.561 -2.576 0.010 -1.43e+04 -1930.559
Neighborhood_NPkVill 1.612e+04 1.15e+04 1.401 0.162 -6483.589 3.87e+04
Neighborhood_NWAmes -1.505e+04 4147.915 -3.628 0.000 -2.32e+04 -6900.090
Neighborhood_NoRidge 1.219e+04 5690.954 2.141 0.033 1005.287 2.34e+04
Neighborhood_NridgHt 1.388e+04 4827.917 2.874 0.004 4392.860 2.34e+04
Neighborhood_OldTown -1.704e+04 5502.935 -3.097 0.002 -2.79e+04 -6234.508
Neighborhood_SWISU -1.057e+04 8678.849 -1.218 0.224 -2.76e+04 6480.937
Neighborhood_Sawyer -6521.8028 3885.752 -1.678 0.094 -1.42e+04 1111.730
Neighborhood_SawyerW -5028.4263 4413.136 -1.139 0.255 -1.37e+04 3641.148
Neighborhood_Somerst -7246.4231 6273.337 -1.155 0.249 -1.96e+04 5077.502
Neighborhood_StoneBr 6.023e+04 6716.782 8.967 0.000 4.7e+04 7.34e+04
Neighborhood_Timber -7879.7337 5016.435 -1.571 0.117 -1.77e+04 1975.016
Neighborhood_Veenker 1354.9000 1.43e+04 0.095 0.924 -2.67e+04 2.94e+04
Condition1_Artery -9465.7000 5654.475 -1.674 0.095 -2.06e+04 1642.475
Condition1_Feedr -23.5864 4896.137 -0.005 0.996 -9642.012 9594.839
Condition1_Norm 3728.5999 3618.612 1.030 0.303 -3380.138 1.08e+04
Condition1_PosA 1.414e+04 1.26e+04 1.121 0.263 -1.06e+04 3.89e+04
Condition1_PosN 9318.8611 9672.546 0.963 0.336 -9682.786 2.83e+04
Condition1_RRAe -1.35e+04 8214.265 -1.643 0.101 -2.96e+04 2640.356
Condition1_RRAn -85.4550 5575.391 -0.015 0.988 -1.1e+04 1.09e+04
Condition1_RRNe -2254.3990 1.59e+04 -0.142 0.887 -3.35e+04 2.9e+04
Condition1_RRNn 1.425e+04 1.46e+04 0.977 0.329 -1.44e+04 4.29e+04
Condition2_Artery 1.231e-12 2.61e-11 0.047 0.962 -5.01e-11 5.26e-11
Condition2_Feedr 5064.5926 8192.456 0.618 0.537 -1.1e+04 2.12e+04
Condition2_Norm 1.105e+04 7675.804 1.440 0.151 -4029.291 2.61e+04
Condition2_PosA 2.996e-11 3.19e-11 0.939 0.348 -3.27e-11 9.26e-11
Condition2_PosN 4.697e-11 2.92e-11 1.610 0.108 -1.03e-11 1.04e-10
Condition2_RRAe -6.856e-11 3.87e-11 -1.771 0.077 -1.45e-10 7.49e-12
Condition2_RRAn -8.321e-11 3.52e-11 -2.366 0.018 -1.52e-10 -1.41e-11
Condition2_RRNn -1.806e-11 2.52e-11 -0.716 0.474 -6.76e-11 3.15e-11
BldgType_1Fam 7115.4212 8702.688 0.818 0.414 -9980.947 2.42e+04
BldgType_2fmCon 3026.8797 2.22e+04 0.136 0.892 -4.06e+04 4.67e+04
BldgType_Duplex -1.296e+04 1.21e+04 -1.070 0.285 -3.67e+04 1.08e+04
BldgType_Twnhs 7282.8735 1.12e+04 0.649 0.516 -1.47e+04 2.93e+04
BldgType_TwnhsE 1.165e+04 1.07e+04 1.091 0.276 -9327.572 3.26e+04
HouseStyle_1.5Fin -1694.3085 4813.348 -0.352 0.725 -1.12e+04 7761.479
HouseStyle_1.5Unf 7281.7269 8259.900 0.882 0.378 -8944.788 2.35e+04
HouseStyle_1Story -1555.9546 7211.536 -0.216 0.829 -1.57e+04 1.26e+04
HouseStyle_2.5Fin -1.623e+04 1.77e+04 -0.915 0.361 -5.11e+04 1.86e+04
HouseStyle_2.5Unf 2709.1949 1.79e+04 0.151 0.880 -3.25e+04 3.79e+04
HouseStyle_2Story 1073.1293 4785.761 0.224 0.823 -8328.464 1.05e+04
HouseStyle_SFoyer 1.488e+04 7677.669 1.938 0.053 -207.154 3e+04
HouseStyle_SLvl 9658.0611 6397.015 1.510 0.132 -2908.828 2.22e+04
RoofStyle_Flat -4160.3395 8628.687 -0.482 0.630 -2.11e+04 1.28e+04
RoofStyle_Gable -1934.3417 5113.683 -0.378 0.705 -1.2e+04 8111.452
RoofStyle_Gambrel 7143.8783 9375.612 0.762 0.446 -1.13e+04 2.56e+04
RoofStyle_Hip 1134.1691 5272.631 0.215 0.830 -9223.877 1.15e+04
RoofStyle_Mansard 1.393e+04 1.09e+04 1.280 0.201 -7454.425 3.53e+04
RoofStyle_Shed 8.508e-12 1.93e-11 0.440 0.660 -2.95e-11 4.65e-11
RoofMatl_ClyTile -1.77e-12 2.31e-11 -0.077 0.939 -4.71e-11 4.36e-11
RoofMatl_CompShg 2.619e+04 1.14e+04 2.299 0.022 3806.595 4.86e+04
RoofMatl_Membran -2.037e-11 2.15e-11 -0.947 0.344 -6.26e-11 2.19e-11
RoofMatl_Metal 3.193e-11 2.53e-11 1.264 0.207 -1.77e-11 8.16e-11
RoofMatl_Roll 9.673e-12 2.42e-11 0.400 0.690 -3.79e-11 5.72e-11
RoofMatl_Tar&Grv -4160.3395 8628.687 -0.482 0.630 -2.11e+04 1.28e+04
RoofMatl_WdShake -5920.0443 1.59e+04 -0.372 0.710 -3.72e+04 2.54e+04
RoofMatl_WdShngl 2.498e-11 2.41e-11 1.037 0.300 -2.23e-11 7.23e-11
Exterior1st_AsbShng 2.129e+04 1.67e+04 1.273 0.204 -1.16e+04 5.41e+04
Exterior1st_AsphShn 2.522e-12 2.05e-11 0.123 0.902 -3.77e-11 4.27e-11
Exterior1st_BrkComm -1807.4964 2.49e+04 -0.073 0.942 -5.06e+04 4.7e+04
Exterior1st_BrkFace 1.658e+04 7458.438 2.224 0.027 1932.197 3.12e+04
Exterior1st_CBlock 1.7e+04 1.18e+04 1.437 0.151 -6241.513 4.02e+04
Exterior1st_CemntBd 4775.5565 2889.020 1.653 0.099 -899.902 1.05e+04
Exterior1st_HdBoard 1167.4066 6205.911 0.188 0.851 -1.1e+04 1.34e+04
Exterior1st_ImStucc 8.55e-12 1.35e-11 0.631 0.528 -1.81e-11 3.52e-11
Exterior1st_MetalSd 3080.1012 1.03e+04 0.300 0.764 -1.71e+04 2.32e+04
Exterior1st_Plywood 6258.6768 6368.168 0.983 0.326 -6251.544 1.88e+04
Exterior1st_Stone 1.25e-11 1.42e-11 0.882 0.378 -1.53e-11 4.03e-11
Exterior1st_Stucco -5.726e+04 2.2e+04 -2.605 0.009 -1e+05 -1.41e+04
Exterior1st_VinylSd -1437.8909 8293.849 -0.173 0.862 -1.77e+04 1.49e+04
Exterior1st_Wd Sdng -1609.8616 5957.645 -0.270 0.787 -1.33e+04 1.01e+04
Exterior1st_WdShing 8077.2753 7596.801 1.063 0.288 -6846.587 2.3e+04
Exterior2nd_AsbShng -5201.2122 1.56e+04 -0.333 0.739 -3.59e+04 2.55e+04
Exterior2nd_AsphShn -7.205e-13 1.72e-11 -0.042 0.967 -3.44e-11 3.3e-11
Exterior2nd_Brk Cmn -7196.0535 1.39e+04 -0.518 0.605 -3.45e+04 2.01e+04
Exterior2nd_BrkFace -2.015e+04 9836.586 -2.048 0.041 -3.95e+04 -823.243
Exterior2nd_CBlock 1.7e+04 1.18e+04 1.437 0.151 -6241.513 4.02e+04
Exterior2nd_CmentBd 4775.5565 2889.020 1.653 0.099 -899.902 1.05e+04
Exterior2nd_HdBoard -5843.1708 5825.026 -1.003 0.316 -1.73e+04 5600.052
Exterior2nd_ImStucc -2.62e+04 1.19e+04 -2.195 0.029 -4.96e+04 -2746.387
Exterior2nd_MetalSd 4151.3490 1.01e+04 0.411 0.682 -1.57e+04 2.4e+04
Exterior2nd_Other -1.141e-11 8.83e-12 -1.293 0.197 -2.88e-11 5.93e-12
Exterior2nd_Plywood -8516.1279 5095.901 -1.671 0.095 -1.85e+04 1494.733
Exterior2nd_Stone -1.007e-11 1.14e-11 -0.884 0.377 -3.24e-11 1.23e-11
Exterior2nd_Stucco 6.631e+04 2.13e+04 3.115 0.002 2.45e+04 1.08e+05
Exterior2nd_VinylSd 1599.1743 8152.917 0.196 0.845 -1.44e+04 1.76e+04
Exterior2nd_Wd Sdng 4225.9916 5722.357 0.739 0.461 -7015.539 1.55e+04
Exterior2nd_Wd Shng -8847.1711 6991.072 -1.265 0.206 -2.26e+04 4886.739
MasVnrType_BrkCmn -2036.8548 7386.709 -0.276 0.783 -1.65e+04 1.25e+04
MasVnrType_BrkFace 2150.7161 2787.011 0.772 0.441 -3324.346 7625.779
MasVnrType_None 1705.0586 3063.321 0.557 0.578 -4312.813 7722.930
MasVnrType_Stone 1.43e+04 3308.295 4.321 0.000 7796.321 2.08e+04
ExterQual_Ex 1.619e+04 6489.369 2.495 0.013 3443.391 2.89e+04
ExterQual_Fa -4122.5778 1.16e+04 -0.354 0.723 -2.7e+04 1.87e+04
ExterQual_Gd 1893.6648 4652.527 0.407 0.684 -7246.191 1.1e+04
ExterQual_TA 2151.5659 4654.361 0.462 0.644 -6991.894 1.13e+04
ExterCond_Ex 2.641e-12 8.23e-12 0.321 0.748 -1.35e-11 1.88e-11
ExterCond_Fa 4131.7192 5543.503 0.745 0.456 -6758.452 1.5e+04
ExterCond_Gd 4368.9766 3332.772 1.311 0.190 -2178.229 1.09e+04
ExterCond_Po 4.715e-12 1.1e-11 0.430 0.667 -1.68e-11 2.63e-11
ExterCond_TA 7613.6670 2911.494 2.615 0.009 1894.057 1.33e+04
Foundation_BrkTil -6597.1028 5249.409 -1.257 0.209 -1.69e+04 3715.323
Foundation_CBlock -390.9368 4583.020 -0.085 0.932 -9394.246 8612.372
Foundation_PConc 306.7008 4702.587 0.065 0.948 -8931.498 9544.899
Foundation_Slab 3338.3849 8075.445 0.413 0.679 -1.25e+04 1.92e+04
Foundation_Stone 1.014e+04 1.27e+04 0.800 0.424 -1.47e+04 3.5e+04
Foundation_Wood 9318.1816 1.73e+04 0.539 0.590 -2.47e+04 4.33e+04
BsmtQual_Ex 9907.7574 3547.356 2.793 0.005 2939.002 1.69e+04
BsmtQual_Fa 1.018e+04 4531.121 2.246 0.025 1277.498 1.91e+04
BsmtQual_Gd -3212.5523 2376.286 -1.352 0.177 -7880.748 1455.644
BsmtQual_TA -759.6953 2383.564 -0.319 0.750 -5442.191 3922.800
BsmtCond_Fa 383.9748 3881.280 0.099 0.921 -7240.772 8008.722
BsmtCond_Gd 5359.8455 3239.165 1.655 0.099 -1003.471 1.17e+04
BsmtCond_Po -1.442e-11 7.31e-12 -1.974 0.049 -2.88e-11 -7e-14
BsmtCond_TA 1.037e+04 2368.752 4.378 0.000 5717.147 1.5e+04
BsmtExposure_Av 1522.1174 1830.707 0.831 0.406 -2074.293 5118.527
BsmtExposure_Gd 1.595e+04 2774.556 5.748 0.000 1.05e+04 2.14e+04
BsmtExposure_Mn -1265.4973 2423.751 -0.522 0.602 -6026.938 3495.943
BsmtExposure_No -90.3296 1655.409 -0.055 0.957 -3342.368 3161.709
BsmtFinType1_ALQ 4633.0114 2010.302 2.305 0.022 683.787 8582.236
BsmtFinType1_BLQ -1012.8177 2361.228 -0.429 0.668 -5651.433 3625.798
BsmtFinType1_GLQ 1.22e+04 2134.104 5.715 0.000 8005.000 1.64e+04
BsmtFinType1_LwQ -3944.2561 3366.343 -1.172 0.242 -1.06e+04 2668.901
BsmtFinType1_Rec 1816.3413 2575.133 0.705 0.481 -3242.488 6875.171
BsmtFinType1_Unf 2424.6528 2193.495 1.105 0.269 -1884.453 6733.758
BsmtFinType2_ALQ 1.557e+04 1.33e+04 1.173 0.241 -1.05e+04 4.16e+04
BsmtFinType2_BLQ -1.019e+04 5873.208 -1.736 0.083 -2.17e+04 1343.232
BsmtFinType2_GLQ 1.653e+04 9778.485 1.690 0.092 -2684.132 3.57e+04
BsmtFinType2_LwQ -4642.6081 4903.219 -0.947 0.344 -1.43e+04 4989.730
BsmtFinType2_Rec -7823.9010 5531.896 -1.414 0.158 -1.87e+04 3043.469
BsmtFinType2_Unf 6677.0985 6092.770 1.096 0.274 -5292.105 1.86e+04
Heating_Floor 0 0 nan nan 0 0
Heating_GasA -6587.0482 8170.722 -0.806 0.421 -2.26e+04 9464.277
Heating_GasW -1.335e+04 1.15e+04 -1.163 0.245 -3.59e+04 9201.280
Heating_Grav 1.403e+04 1.4e+04 1.002 0.317 -1.35e+04 4.15e+04
Heating_OthW 0 0 nan nan 0 0
Heating_Wall 2.202e+04 1.67e+04 1.316 0.189 -1.09e+04 5.49e+04
HeatingQC_Ex 3958.7241 2018.545 1.961 0.050 -6.693 7924.141
HeatingQC_Fa 3183.5497 4013.857 0.793 0.428 -4701.644 1.11e+04
HeatingQC_Gd 4388.2889 2102.819 2.087 0.037 257.316 8519.262
HeatingQC_Po 0 0 nan nan 0 0
HeatingQC_TA 4583.8002 1906.134 2.405 0.017 839.214 8328.386
CentralAir_N 9107.5930 2761.543 3.298 0.001 3682.562 1.45e+04
CentralAir_Y 7006.7699 2846.041 2.462 0.014 1415.743 1.26e+04
Electrical_FuseA 8272.3678 6295.764 1.314 0.189 -4095.615 2.06e+04
Electrical_FuseF 953.3996 7169.303 0.133 0.894 -1.31e+04 1.5e+04
Electrical_FuseP 1879.5518 1.73e+04 0.109 0.914 -3.21e+04 3.59e+04
Electrical_Mix 0 0 nan nan 0 0
Electrical_SBrkr 5009.0436 6224.558 0.805 0.421 -7219.056 1.72e+04
KitchenQual_Ex 1.796e+04 3521.980 5.100 0.000 1.1e+04 2.49e+04
KitchenQual_Fa 272.7772 4734.317 0.058 0.954 -9027.755 9573.309
KitchenQual_Gd -1020.6401 2320.748 -0.440 0.660 -5579.733 3538.453
KitchenQual_TA -1101.2641 2187.552 -0.503 0.615 -5398.694 3196.166
Functional_Maj1 -5909.9604 1.1e+04 -0.536 0.592 -2.76e+04 1.58e+04
Functional_Maj2 7311.6884 2.09e+04 0.351 0.726 -3.37e+04 4.83e+04
Functional_Min1 2773.6904 6744.581 0.411 0.681 -1.05e+04 1.6e+04
Functional_Min2 4538.7546 6658.493 0.682 0.496 -8541.807 1.76e+04
Functional_Mod -9981.1420 9461.133 -1.055 0.292 -2.86e+04 8605.187
Functional_Sev 0 0 nan nan 0 0
Functional_Typ 1.738e+04 5017.224 3.464 0.001 7525.031 2.72e+04
FireplaceQu_Ex -1.101e+04 7102.582 -1.550 0.122 -2.5e+04 2942.254
FireplaceQu_Fa -2359.1252 4643.678 -0.508 0.612 -1.15e+04 6763.347
FireplaceQu_Gd 6794.2601 2725.762 2.493 0.013 1439.520 1.21e+04
FireplaceQu_Po 1.938e+04 5454.613 3.553 0.000 8661.996 3.01e+04
FireplaceQu_TA 3312.3996 2772.225 1.195 0.233 -2133.616 8758.415
GarageType_2Types 0 0 nan nan 0 0
GarageType_Attchd -4931.1936 4200.811 -1.174 0.241 -1.32e+04 3321.269
GarageType_Basment -2229.4607 6763.426 -0.330 0.742 -1.55e+04 1.11e+04
GarageType_BuiltIn -2776.8124 5091.721 -0.545 0.586 -1.28e+04 7225.836
GarageType_CarPort 2.801e+04 1.49e+04 1.880 0.061 -1251.668 5.73e+04
GarageType_Detchd -1957.7472 4342.540 -0.451 0.652 -1.05e+04 6573.141
GarageFinish_Fin 3640.9994 1548.627 2.351 0.019 598.733 6683.266
GarageFinish_RFn 6731.2640 1477.132 4.557 0.000 3829.450 9633.079
GarageFinish_Unf 5742.0994 1736.538 3.307 0.001 2330.683 9153.516
GarageQual_Ex 7127.8693 6099.030 1.169 0.243 -4853.632 1.91e+04
GarageQual_Fa 2101.8684 5578.248 0.377 0.706 -8856.561 1.31e+04
GarageQual_Gd 609.3395 7658.405 0.080 0.937 -1.44e+04 1.57e+04
GarageQual_Po 0 0 nan nan 0 0
GarageQual_TA 6275.2856 4051.434 1.549 0.122 -1683.728 1.42e+04
GarageCond_Ex 7127.8693 6099.030 1.169 0.243 -4853.632 1.91e+04
GarageCond_Fa -4718.4580 8188.980 -0.576 0.565 -2.08e+04 1.14e+04
GarageCond_Gd 1.587e+04 1.4e+04 1.135 0.257 -1.16e+04 4.34e+04
GarageCond_Po -3215.0442 2.16e+04 -0.149 0.882 -4.56e+04 3.92e+04
GarageCond_TA 1048.3214 7475.140 0.140 0.889 -1.36e+04 1.57e+04
PavedDrive_N 9346.5553 3438.231 2.718 0.007 2592.175 1.61e+04
PavedDrive_P -953.1221 4757.017 -0.200 0.841 -1.03e+04 8392.003
PavedDrive_Y 7720.9297 3106.094 2.486 0.013 1619.030 1.38e+04
Fence_GdPrv 5947.7226 4219.920 1.409 0.159 -2342.279 1.42e+04
Fence_GdWo 9617.4168 3709.081 2.593 0.010 2330.954 1.69e+04
Fence_MnPrv 7229.1092 2717.115 2.661 0.008 1891.357 1.26e+04
Fence_MnWw -6679.8858 6636.636 -1.007 0.315 -1.97e+04 6357.738
SaleType_COD -1618.4116 6526.789 -0.248 0.804 -1.44e+04 1.12e+04
SaleType_CWD 5.253e+04 1.26e+04 4.168 0.000 2.78e+04 7.73e+04
SaleType_Con 5743.7135 1.63e+04 0.353 0.724 -2.62e+04 3.77e+04
SaleType_ConLD -7598.1329 1.07e+04 -0.711 0.477 -2.86e+04 1.34e+04
SaleType_ConLI -4069.6281 1.81e+04 -0.225 0.822 -3.96e+04 3.15e+04
SaleType_ConLw -9888.1004 1.32e+04 -0.748 0.455 -3.59e+04 1.61e+04
SaleType_New -2.665e+04 1.3e+04 -2.049 0.041 -5.22e+04 -1102.959
SaleType_Oth 6907.1675 1.21e+04 0.571 0.568 -1.69e+04 3.07e+04
SaleType_WD 763.1038 4724.799 0.162 0.872 -8518.729 1e+04
SaleCondition_Abnorml -5180.2760 5587.306 -0.927 0.354 -1.62e+04 5795.946
SaleCondition_AdjLand 2.61e+04 1.79e+04 1.460 0.145 -9017.280 6.12e+04
SaleCondition_Alloca -4.918e+04 1.71e+04 -2.877 0.004 -8.28e+04 -1.56e+04
SaleCondition_Family 8611.0939 7932.263 1.086 0.278 -6971.780 2.42e+04
SaleCondition_Normal 107.0441 5400.131 0.020 0.984 -1.05e+04 1.07e+04
SaleCondition_Partial 3.566e+04 1.3e+04 2.734 0.006 1e+04 6.13e+04
Omnibus: 136.128 Durbin-Watson: 1.911
Prob(Omnibus): 0.000 Jarque-Bera (JB): 2761.370
Skew: 0.089 Prob(JB): 0.00
Kurtosis: 12.553 Cond. No. 1.25e+16


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 1.27e-28. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
In [57]:
rsq=round(model.rsquared_adj,4)
In [58]:
rsq
Out[58]:
0.9346
In [59]:
model.pvalues.sort_values()
Out[59]:
Neighborhood_StoneBr    5.352529e-18
KitchenAbvGr            3.439900e-11
Utilities_AllPub        3.439900e-11
BsmtHalfBath            3.439900e-11
BsmtExposure_Gd         1.533103e-08
                            ...     
HeatingQC_Po                     NaN
Electrical_Mix                   NaN
Functional_Sev                   NaN
GarageType_2Types                NaN
GarageQual_Po                    NaN
Length: 276, dtype: float64
In [60]:
col_to_drop=model.pvalues.sort_values().index[-1]
In [237]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)

print('column to drop',col_to_drop)
Adjusted r_squared 0.9406
column to drop Exterior2nd_CBlock
In [238]:
Xnew.columns
Out[238]:
Index(['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt',
       'BsmtFinSF1', 'BsmtFinSF2', 'TotalBsmtSF', '2ndFlrSF', 'GrLivArea',
       ...
       'GarageQual_TA', 'GarageCond_Fa', 'GarageCond_Gd', 'Fence_GdPrv',
       'Fence_GdWo', 'Fence_MnPrv', 'SaleType_CWD', 'SaleCondition_Abnorml',
       'SaleCondition_Alloca', 'SaleCondition_Partial'],
      dtype='object', length=113)
In [296]:
Xnew.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1038 entries, 0 to 1037
Columns: 113 entries, MSSubClass to SaleCondition_Partial
dtypes: float64(18), uint8(95)
memory usage: 242.4 KB
In [316]:
Xnew=Xnew.drop(labels=['Exterior1st_ImStucc','HouseStyle_2.5Fin'],axis=1)
In [318]:
Xnew.columns
Out[318]:
Index(['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt',
       'BsmtFinSF1', 'BsmtFinSF2', 'TotalBsmtSF', '2ndFlrSF', 'GrLivArea',
       ...
       'GarageQual_TA', 'GarageCond_Fa', 'GarageCond_Gd', 'Fence_GdPrv',
       'Fence_GdWo', 'Fence_MnPrv', 'SaleType_CWD', 'SaleCondition_Abnorml',
       'SaleCondition_Alloca', 'SaleCondition_Partial'],
      dtype='object', length=111)

Applying linear regression¶

In [319]:
from sklearn.linear_model import LinearRegression
In [320]:
lm=LinearRegression()
In [321]:
model=lm.fit(xtrain,ytrain)
In [322]:
tr_pred=model.predict(xtrain)
In [323]:
ts_pred=model.predict(xtest)
In [324]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
In [325]:
tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)

print('train_err',tr_err)
print('test_err',ts_err)
print('train_ab',tr_ab)
print('test_ab',ts_ab)
train_err 228448012.2987952
test_err 4.273445948639171e+26
train_ab 10649.578313253012
test_ab 1433367142066.1394
In [326]:
len('Xtrain')
Out[326]:
6

Model is Overfit (Training Error is less than Testing Error)¶

In [327]:
from sklearn.linear_model import Ridge
In [328]:
rr=Ridge(alpha=23)
model=rr.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)

tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)

print('train_err',tr_err)
print('test_err',ts_err)
print('train_ab',tr_ab)
print('test_ab',ts_ab)
train_err 306758235.53366125
test_err 453827951.64898556
train_ab 12176.956117645803
test_ab 14044.049405250164
In [329]:
w=[]
e=0.01
for i in range(0,1000,1):
    w.append(e)
    e=round(e+0.01,2)
In [330]:
w
Out[330]:
[0.01,
 0.02,
 0.03,
 0.04,
 0.05,
 0.06,
 0.07,
 0.08,
 0.09,
 0.1,
 0.11,
 0.12,
 0.13,
 0.14,
 0.15,
 0.16,
 0.17,
 0.18,
 0.19,
 0.2,
 0.21,
 0.22,
 0.23,
 0.24,
 0.25,
 0.26,
 0.27,
 0.28,
 0.29,
 0.3,
 0.31,
 0.32,
 0.33,
 0.34,
 0.35,
 0.36,
 0.37,
 0.38,
 0.39,
 0.4,
 0.41,
 0.42,
 0.43,
 0.44,
 0.45,
 0.46,
 0.47,
 0.48,
 0.49,
 0.5,
 0.51,
 0.52,
 0.53,
 0.54,
 0.55,
 0.56,
 0.57,
 0.58,
 0.59,
 0.6,
 0.61,
 0.62,
 0.63,
 0.64,
 0.65,
 0.66,
 0.67,
 0.68,
 0.69,
 0.7,
 0.71,
 0.72,
 0.73,
 0.74,
 0.75,
 0.76,
 0.77,
 0.78,
 0.79,
 0.8,
 0.81,
 0.82,
 0.83,
 0.84,
 0.85,
 0.86,
 0.87,
 0.88,
 0.89,
 0.9,
 0.91,
 0.92,
 0.93,
 0.94,
 0.95,
 0.96,
 0.97,
 0.98,
 0.99,
 1.0,
 1.01,
 1.02,
 1.03,
 1.04,
 1.05,
 1.06,
 1.07,
 1.08,
 1.09,
 1.1,
 1.11,
 1.12,
 1.13,
 1.14,
 1.15,
 1.16,
 1.17,
 1.18,
 1.19,
 1.2,
 1.21,
 1.22,
 1.23,
 1.24,
 1.25,
 1.26,
 1.27,
 1.28,
 1.29,
 1.3,
 1.31,
 1.32,
 1.33,
 1.34,
 1.35,
 1.36,
 1.37,
 1.38,
 1.39,
 1.4,
 1.41,
 1.42,
 1.43,
 1.44,
 1.45,
 1.46,
 1.47,
 1.48,
 1.49,
 1.5,
 1.51,
 1.52,
 1.53,
 1.54,
 1.55,
 1.56,
 1.57,
 1.58,
 1.59,
 1.6,
 1.61,
 1.62,
 1.63,
 1.64,
 1.65,
 1.66,
 1.67,
 1.68,
 1.69,
 1.7,
 1.71,
 1.72,
 1.73,
 1.74,
 1.75,
 1.76,
 1.77,
 1.78,
 1.79,
 1.8,
 1.81,
 1.82,
 1.83,
 1.84,
 1.85,
 1.86,
 1.87,
 1.88,
 1.89,
 1.9,
 1.91,
 1.92,
 1.93,
 1.94,
 1.95,
 1.96,
 1.97,
 1.98,
 1.99,
 2.0,
 2.01,
 2.02,
 2.03,
 2.04,
 2.05,
 2.06,
 2.07,
 2.08,
 2.09,
 2.1,
 2.11,
 2.12,
 2.13,
 2.14,
 2.15,
 2.16,
 2.17,
 2.18,
 2.19,
 2.2,
 2.21,
 2.22,
 2.23,
 2.24,
 2.25,
 2.26,
 2.27,
 2.28,
 2.29,
 2.3,
 2.31,
 2.32,
 2.33,
 2.34,
 2.35,
 2.36,
 2.37,
 2.38,
 2.39,
 2.4,
 2.41,
 2.42,
 2.43,
 2.44,
 2.45,
 2.46,
 2.47,
 2.48,
 2.49,
 2.5,
 2.51,
 2.52,
 2.53,
 2.54,
 2.55,
 2.56,
 2.57,
 2.58,
 2.59,
 2.6,
 2.61,
 2.62,
 2.63,
 2.64,
 2.65,
 2.66,
 2.67,
 2.68,
 2.69,
 2.7,
 2.71,
 2.72,
 2.73,
 2.74,
 2.75,
 2.76,
 2.77,
 2.78,
 2.79,
 2.8,
 2.81,
 2.82,
 2.83,
 2.84,
 2.85,
 2.86,
 2.87,
 2.88,
 2.89,
 2.9,
 2.91,
 2.92,
 2.93,
 2.94,
 2.95,
 2.96,
 2.97,
 2.98,
 2.99,
 3.0,
 3.01,
 3.02,
 3.03,
 3.04,
 3.05,
 3.06,
 3.07,
 3.08,
 3.09,
 3.1,
 3.11,
 3.12,
 3.13,
 3.14,
 3.15,
 3.16,
 3.17,
 3.18,
 3.19,
 3.2,
 3.21,
 3.22,
 3.23,
 3.24,
 3.25,
 3.26,
 3.27,
 3.28,
 3.29,
 3.3,
 3.31,
 3.32,
 3.33,
 3.34,
 3.35,
 3.36,
 3.37,
 3.38,
 3.39,
 3.4,
 3.41,
 3.42,
 3.43,
 3.44,
 3.45,
 3.46,
 3.47,
 3.48,
 3.49,
 3.5,
 3.51,
 3.52,
 3.53,
 3.54,
 3.55,
 3.56,
 3.57,
 3.58,
 3.59,
 3.6,
 3.61,
 3.62,
 3.63,
 3.64,
 3.65,
 3.66,
 3.67,
 3.68,
 3.69,
 3.7,
 3.71,
 3.72,
 3.73,
 3.74,
 3.75,
 3.76,
 3.77,
 3.78,
 3.79,
 3.8,
 3.81,
 3.82,
 3.83,
 3.84,
 3.85,
 3.86,
 3.87,
 3.88,
 3.89,
 3.9,
 3.91,
 3.92,
 3.93,
 3.94,
 3.95,
 3.96,
 3.97,
 3.98,
 3.99,
 4.0,
 4.01,
 4.02,
 4.03,
 4.04,
 4.05,
 4.06,
 4.07,
 4.08,
 4.09,
 4.1,
 4.11,
 4.12,
 4.13,
 4.14,
 4.15,
 4.16,
 4.17,
 4.18,
 4.19,
 4.2,
 4.21,
 4.22,
 4.23,
 4.24,
 4.25,
 4.26,
 4.27,
 4.28,
 4.29,
 4.3,
 4.31,
 4.32,
 4.33,
 4.34,
 4.35,
 4.36,
 4.37,
 4.38,
 4.39,
 4.4,
 4.41,
 4.42,
 4.43,
 4.44,
 4.45,
 4.46,
 4.47,
 4.48,
 4.49,
 4.5,
 4.51,
 4.52,
 4.53,
 4.54,
 4.55,
 4.56,
 4.57,
 4.58,
 4.59,
 4.6,
 4.61,
 4.62,
 4.63,
 4.64,
 4.65,
 4.66,
 4.67,
 4.68,
 4.69,
 4.7,
 4.71,
 4.72,
 4.73,
 4.74,
 4.75,
 4.76,
 4.77,
 4.78,
 4.79,
 4.8,
 4.81,
 4.82,
 4.83,
 4.84,
 4.85,
 4.86,
 4.87,
 4.88,
 4.89,
 4.9,
 4.91,
 4.92,
 4.93,
 4.94,
 4.95,
 4.96,
 4.97,
 4.98,
 4.99,
 5.0,
 5.01,
 5.02,
 5.03,
 5.04,
 5.05,
 5.06,
 5.07,
 5.08,
 5.09,
 5.1,
 5.11,
 5.12,
 5.13,
 5.14,
 5.15,
 5.16,
 5.17,
 5.18,
 5.19,
 5.2,
 5.21,
 5.22,
 5.23,
 5.24,
 5.25,
 5.26,
 5.27,
 5.28,
 5.29,
 5.3,
 5.31,
 5.32,
 5.33,
 5.34,
 5.35,
 5.36,
 5.37,
 5.38,
 5.39,
 5.4,
 5.41,
 5.42,
 5.43,
 5.44,
 5.45,
 5.46,
 5.47,
 5.48,
 5.49,
 5.5,
 5.51,
 5.52,
 5.53,
 5.54,
 5.55,
 5.56,
 5.57,
 5.58,
 5.59,
 5.6,
 5.61,
 5.62,
 5.63,
 5.64,
 5.65,
 5.66,
 5.67,
 5.68,
 5.69,
 5.7,
 5.71,
 5.72,
 5.73,
 5.74,
 5.75,
 5.76,
 5.77,
 5.78,
 5.79,
 5.8,
 5.81,
 5.82,
 5.83,
 5.84,
 5.85,
 5.86,
 5.87,
 5.88,
 5.89,
 5.9,
 5.91,
 5.92,
 5.93,
 5.94,
 5.95,
 5.96,
 5.97,
 5.98,
 5.99,
 6.0,
 6.01,
 6.02,
 6.03,
 6.04,
 6.05,
 6.06,
 6.07,
 6.08,
 6.09,
 6.1,
 6.11,
 6.12,
 6.13,
 6.14,
 6.15,
 6.16,
 6.17,
 6.18,
 6.19,
 6.2,
 6.21,
 6.22,
 6.23,
 6.24,
 6.25,
 6.26,
 6.27,
 6.28,
 6.29,
 6.3,
 6.31,
 6.32,
 6.33,
 6.34,
 6.35,
 6.36,
 6.37,
 6.38,
 6.39,
 6.4,
 6.41,
 6.42,
 6.43,
 6.44,
 6.45,
 6.46,
 6.47,
 6.48,
 6.49,
 6.5,
 6.51,
 6.52,
 6.53,
 6.54,
 6.55,
 6.56,
 6.57,
 6.58,
 6.59,
 6.6,
 6.61,
 6.62,
 6.63,
 6.64,
 6.65,
 6.66,
 6.67,
 6.68,
 6.69,
 6.7,
 6.71,
 6.72,
 6.73,
 6.74,
 6.75,
 6.76,
 6.77,
 6.78,
 6.79,
 6.8,
 6.81,
 6.82,
 6.83,
 6.84,
 6.85,
 6.86,
 6.87,
 6.88,
 6.89,
 6.9,
 6.91,
 6.92,
 6.93,
 6.94,
 6.95,
 6.96,
 6.97,
 6.98,
 6.99,
 7.0,
 7.01,
 7.02,
 7.03,
 7.04,
 7.05,
 7.06,
 7.07,
 7.08,
 7.09,
 7.1,
 7.11,
 7.12,
 7.13,
 7.14,
 7.15,
 7.16,
 7.17,
 7.18,
 7.19,
 7.2,
 7.21,
 7.22,
 7.23,
 7.24,
 7.25,
 7.26,
 7.27,
 7.28,
 7.29,
 7.3,
 7.31,
 7.32,
 7.33,
 7.34,
 7.35,
 7.36,
 7.37,
 7.38,
 7.39,
 7.4,
 7.41,
 7.42,
 7.43,
 7.44,
 7.45,
 7.46,
 7.47,
 7.48,
 7.49,
 7.5,
 7.51,
 7.52,
 7.53,
 7.54,
 7.55,
 7.56,
 7.57,
 7.58,
 7.59,
 7.6,
 7.61,
 7.62,
 7.63,
 7.64,
 7.65,
 7.66,
 7.67,
 7.68,
 7.69,
 7.7,
 7.71,
 7.72,
 7.73,
 7.74,
 7.75,
 7.76,
 7.77,
 7.78,
 7.79,
 7.8,
 7.81,
 7.82,
 7.83,
 7.84,
 7.85,
 7.86,
 7.87,
 7.88,
 7.89,
 7.9,
 7.91,
 7.92,
 7.93,
 7.94,
 7.95,
 7.96,
 7.97,
 7.98,
 7.99,
 8.0,
 8.01,
 8.02,
 8.03,
 8.04,
 8.05,
 8.06,
 8.07,
 8.08,
 8.09,
 8.1,
 8.11,
 8.12,
 8.13,
 8.14,
 8.15,
 8.16,
 8.17,
 8.18,
 8.19,
 8.2,
 8.21,
 8.22,
 8.23,
 8.24,
 8.25,
 8.26,
 8.27,
 8.28,
 8.29,
 8.3,
 8.31,
 8.32,
 8.33,
 8.34,
 8.35,
 8.36,
 8.37,
 8.38,
 8.39,
 8.4,
 8.41,
 8.42,
 8.43,
 8.44,
 8.45,
 8.46,
 8.47,
 8.48,
 8.49,
 8.5,
 8.51,
 8.52,
 8.53,
 8.54,
 8.55,
 8.56,
 8.57,
 8.58,
 8.59,
 8.6,
 8.61,
 8.62,
 8.63,
 8.64,
 8.65,
 8.66,
 8.67,
 8.68,
 8.69,
 8.7,
 8.71,
 8.72,
 8.73,
 8.74,
 8.75,
 8.76,
 8.77,
 8.78,
 8.79,
 8.8,
 8.81,
 8.82,
 8.83,
 8.84,
 8.85,
 8.86,
 8.87,
 8.88,
 8.89,
 8.9,
 8.91,
 8.92,
 8.93,
 8.94,
 8.95,
 8.96,
 8.97,
 8.98,
 8.99,
 9.0,
 9.01,
 9.02,
 9.03,
 9.04,
 9.05,
 9.06,
 9.07,
 9.08,
 9.09,
 9.1,
 9.11,
 9.12,
 9.13,
 9.14,
 9.15,
 9.16,
 9.17,
 9.18,
 9.19,
 9.2,
 9.21,
 9.22,
 9.23,
 9.24,
 9.25,
 9.26,
 9.27,
 9.28,
 9.29,
 9.3,
 9.31,
 9.32,
 9.33,
 9.34,
 9.35,
 9.36,
 9.37,
 9.38,
 9.39,
 9.4,
 9.41,
 9.42,
 9.43,
 9.44,
 9.45,
 9.46,
 9.47,
 9.48,
 9.49,
 9.5,
 9.51,
 9.52,
 9.53,
 9.54,
 9.55,
 9.56,
 9.57,
 9.58,
 9.59,
 9.6,
 9.61,
 9.62,
 9.63,
 9.64,
 9.65,
 9.66,
 9.67,
 9.68,
 9.69,
 9.7,
 9.71,
 9.72,
 9.73,
 9.74,
 9.75,
 9.76,
 9.77,
 9.78,
 9.79,
 9.8,
 9.81,
 9.82,
 9.83,
 9.84,
 9.85,
 9.86,
 9.87,
 9.88,
 9.89,
 9.9,
 9.91,
 9.92,
 9.93,
 9.94,
 9.95,
 9.96,
 9.97,
 9.98,
 9.99,
 10.0]
In [331]:
rr=Ridge()
tg={'alpha':w}

from sklearn.model_selection import GridSearchCV

cv=GridSearchCV(rr,tg,scoring='neg_mean_absolute_error',cv=4)
cvmodel=cv.fit(Xnew,Y)
In [332]:
cvmodel.best_params_
Out[332]:
{'alpha': 3.09}
In [333]:
rr=Ridge(alpha=3.09)
model=rr.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)

tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)

print('train_err',tr_err)
print('test_err',ts_err)
print('train_ab',tr_ab)
print('test_ab',ts_ab)
train_err 244994310.80139524
test_err 443235239.3342266
train_ab 10911.75380887483
test_ab 13737.790741817766
In [334]:
cvmodel.best_estimator_
Out[334]:
Ridge(alpha=3.09)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Ridge(alpha=3.09)

Testing DataSet¶

In [335]:
df2=pd.read_csv('Downloads/testing_set.csv')
In [336]:
df2
Out[336]:
Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour Utilities ... ScreenPorch PoolArea PoolQC Fence MiscFeature MiscVal MoSold YrSold SaleType SaleCondition
0 1461 20 RH 80.0 11622 Pave NaN Reg Lvl AllPub ... 120 0 NaN MnPrv NaN 0 6 2010 WD Normal
1 1462 20 RL 81.0 14267 Pave NaN IR1 Lvl AllPub ... 0 0 NaN NaN Gar2 12500 6 2010 WD Normal
2 1463 60 RL 74.0 13830 Pave NaN IR1 Lvl AllPub ... 0 0 NaN MnPrv NaN 0 3 2010 WD Normal
3 1464 60 RL 78.0 9978 Pave NaN IR1 Lvl AllPub ... 0 0 NaN NaN NaN 0 6 2010 WD Normal
4 1465 120 RL 43.0 5005 Pave NaN IR1 HLS AllPub ... 144 0 NaN NaN NaN 0 1 2010 WD Normal
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1454 2915 160 RM 21.0 1936 Pave NaN Reg Lvl AllPub ... 0 0 NaN NaN NaN 0 6 2006 WD Normal
1455 2916 160 RM 21.0 1894 Pave NaN Reg Lvl AllPub ... 0 0 NaN NaN NaN 0 4 2006 WD Abnorml
1456 2917 20 RL 160.0 20000 Pave NaN Reg Lvl AllPub ... 0 0 NaN NaN NaN 0 9 2006 WD Abnorml
1457 2918 85 RL 62.0 10441 Pave NaN Reg Lvl AllPub ... 0 0 NaN MnPrv Shed 700 7 2006 WD Normal
1458 2919 60 RL 74.0 9627 Pave NaN Reg Lvl AllPub ... 0 0 NaN NaN NaN 0 11 2006 WD Normal

1459 rows × 80 columns

In [337]:
df2.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1459 entries, 0 to 1458
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1459 non-null   int64  
 1   MSSubClass     1459 non-null   int64  
 2   MSZoning       1455 non-null   object 
 3   LotFrontage    1232 non-null   float64
 4   LotArea        1459 non-null   int64  
 5   Street         1459 non-null   object 
 6   Alley          107 non-null    object 
 7   LotShape       1459 non-null   object 
 8   LandContour    1459 non-null   object 
 9   Utilities      1457 non-null   object 
 10  LotConfig      1459 non-null   object 
 11  LandSlope      1459 non-null   object 
 12  Neighborhood   1459 non-null   object 
 13  Condition1     1459 non-null   object 
 14  Condition2     1459 non-null   object 
 15  BldgType       1459 non-null   object 
 16  HouseStyle     1459 non-null   object 
 17  OverallQual    1459 non-null   int64  
 18  OverallCond    1459 non-null   int64  
 19  YearBuilt      1459 non-null   int64  
 20  YearRemodAdd   1459 non-null   int64  
 21  RoofStyle      1459 non-null   object 
 22  RoofMatl       1459 non-null   object 
 23  Exterior1st    1458 non-null   object 
 24  Exterior2nd    1458 non-null   object 
 25  MasVnrType     1443 non-null   object 
 26  MasVnrArea     1444 non-null   float64
 27  ExterQual      1459 non-null   object 
 28  ExterCond      1459 non-null   object 
 29  Foundation     1459 non-null   object 
 30  BsmtQual       1415 non-null   object 
 31  BsmtCond       1414 non-null   object 
 32  BsmtExposure   1415 non-null   object 
 33  BsmtFinType1   1417 non-null   object 
 34  BsmtFinSF1     1458 non-null   float64
 35  BsmtFinType2   1417 non-null   object 
 36  BsmtFinSF2     1458 non-null   float64
 37  BsmtUnfSF      1458 non-null   float64
 38  TotalBsmtSF    1458 non-null   float64
 39  Heating        1459 non-null   object 
 40  HeatingQC      1459 non-null   object 
 41  CentralAir     1459 non-null   object 
 42  Electrical     1459 non-null   object 
 43  1stFlrSF       1459 non-null   int64  
 44  2ndFlrSF       1459 non-null   int64  
 45  LowQualFinSF   1459 non-null   int64  
 46  GrLivArea      1459 non-null   int64  
 47  BsmtFullBath   1457 non-null   float64
 48  BsmtHalfBath   1457 non-null   float64
 49  FullBath       1459 non-null   int64  
 50  HalfBath       1459 non-null   int64  
 51  BedroomAbvGr   1459 non-null   int64  
 52  KitchenAbvGr   1459 non-null   int64  
 53  KitchenQual    1458 non-null   object 
 54  TotRmsAbvGrd   1459 non-null   int64  
 55  Functional     1457 non-null   object 
 56  Fireplaces     1459 non-null   int64  
 57  FireplaceQu    729 non-null    object 
 58  GarageType     1383 non-null   object 
 59  GarageYrBlt    1381 non-null   float64
 60  GarageFinish   1381 non-null   object 
 61  GarageCars     1458 non-null   float64
 62  GarageArea     1458 non-null   float64
 63  GarageQual     1381 non-null   object 
 64  GarageCond     1381 non-null   object 
 65  PavedDrive     1459 non-null   object 
 66  WoodDeckSF     1459 non-null   int64  
 67  OpenPorchSF    1459 non-null   int64  
 68  EnclosedPorch  1459 non-null   int64  
 69  3SsnPorch      1459 non-null   int64  
 70  ScreenPorch    1459 non-null   int64  
 71  PoolArea       1459 non-null   int64  
 72  PoolQC         3 non-null      object 
 73  Fence          290 non-null    object 
 74  MiscFeature    51 non-null     object 
 75  MiscVal        1459 non-null   int64  
 76  MoSold         1459 non-null   int64  
 77  YrSold         1459 non-null   int64  
 78  SaleType       1458 non-null   object 
 79  SaleCondition  1459 non-null   object 
dtypes: float64(11), int64(26), object(43)
memory usage: 912.0+ KB
In [338]:
df2.shape
Out[338]:
(1459, 80)

Filling Null Values¶

In [339]:
df2.isna().sum()
Out[339]:
Id                 0
MSSubClass         0
MSZoning           4
LotFrontage      227
LotArea            0
                ... 
MiscVal            0
MoSold             0
YrSold             0
SaleType           1
SaleCondition      0
Length: 80, dtype: int64
In [340]:
for i in df2.columns:
        if (df2[i].isna().sum())>0:
            if df2[i].dtypes=='object':
                x=df2[i].mode()[0]
                df2[i]=df2[i].fillna(x)
            else:
                x=df2[i].mean()
                df2[i]=df2[i].fillna(x)
In [341]:
df2.isna().sum()
Out[341]:
Id               0
MSSubClass       0
MSZoning         0
LotFrontage      0
LotArea          0
                ..
MiscVal          0
MoSold           0
YrSold           0
SaleType         0
SaleCondition    0
Length: 80, dtype: int64

Dropping Irrelevant Coloumns¶

Seperating Input & Output Features¶

In [342]:
Xts=df2.drop(labels=['Id','LowQualFinSF','MiscVal'],axis=1)
In [343]:
Xts.shape
Out[343]:
(1459, 77)

Seperating Categorical and Continous Coloumns¶

In [344]:
cat_ts=[]
con_ts=[]
for i in Xts.columns:
    if Xts[i].dtypes=='object':
        cat_ts.append(i)
    else:
        con_ts.append(i)
print(cat_ts)
print(con_ts)
['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MoSold', 'YrSold']
In [345]:
cat_ts=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 
        'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 
        'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 
        'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 
        'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 
        'SaleCondition']
con_ts=['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 
        'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 
        'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 
        'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 
        'MoSold', 'YrSold']

Standardisation¶

In [346]:
from sklearn.preprocessing import StandardScaler
ss=StandardScaler()
In [347]:
X1ts=pd.DataFrame(ss.fit_transform(Xts[con_ts]),columns=con_ts)
In [348]:
X1ts
Out[348]:
MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 BsmtFinSF2 ... GarageCars GarageArea WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea MoSold YrSold
0 -0.874711 0.555587 0.363929 -0.751101 0.400766 -0.340945 -1.072885 -0.570108 0.063295 0.517348 ... -0.988013 1.185945 0.366678 -0.701628 -0.360738 -0.088827 1.818960 -0.057227 -0.038281 1.713905
1 -0.874711 0.604239 0.897861 -0.054877 0.400766 -0.439695 -1.214908 0.041273 1.063392 -0.297903 ... -0.988013 -0.741213 2.347867 -0.178826 -0.360738 -0.088827 -0.301543 -0.057227 -0.038281 1.713905
2 0.061351 0.263676 0.809646 -0.751101 -0.497418 0.844059 0.678742 -0.570108 0.773254 -0.297903 ... 0.301623 0.042559 0.930495 -0.207871 -0.360738 -0.088827 -0.301543 -0.057227 -1.140614 1.713905
3 0.061351 0.458284 0.032064 -0.054877 0.400766 0.876976 0.678742 -0.456889 0.357829 -0.297903 ... 0.301623 -0.012766 2.089451 -0.178826 -0.360738 -0.088827 -0.301543 -0.057227 -0.038281 1.713905
4 1.465443 -1.244533 -0.971808 1.337571 -0.497418 0.679475 0.394694 -0.570108 -0.387298 -0.297903 ... 0.301623 0.153210 -0.729632 0.489198 -0.360738 -0.088827 2.243060 -0.057227 -1.875504 1.713905
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1454 2.401505 -2.314875 -1.591330 -1.447325 1.298950 -0.044694 -0.646813 -0.570108 -0.965376 -0.297903 ... -2.277648 -2.179665 -0.729632 -0.701628 -0.360738 -0.088827 -0.301543 -0.057227 -0.038281 -1.359958
1455 2.401505 -2.314875 -1.599808 -1.447325 -0.497418 -0.044694 -0.646813 -0.570108 -0.411477 -0.297903 ... -0.988013 -0.861084 -0.729632 -0.353093 -0.360738 -0.088827 -0.301543 -0.057227 -0.773170 -1.359958
1456 -0.874711 4.447740 2.055150 -0.751101 1.298950 -0.373861 0.584059 -0.570108 1.724994 -0.297903 ... 0.301623 0.475939 2.982161 -0.701628 -0.360738 -0.088827 -0.301543 -0.057227 1.064053 -1.359958
1457 0.646389 -0.320147 0.125527 -0.751101 -0.497418 0.679475 0.394694 -0.570108 -0.224645 -0.297903 ... -2.277648 -2.179665 -0.103169 -0.236915 -0.360738 -0.088827 -0.301543 -0.057227 0.329164 -1.359958
1458 0.061351 0.263676 -0.038790 0.641347 -0.497418 0.712392 0.489377 -0.037980 0.700719 -0.297903 ... 1.591258 0.817111 0.758218 -0.004559 -0.360738 -0.088827 -0.301543 -0.057227 1.798942 -1.359958

1459 rows × 34 columns

Encoding ( converting Categorical cloumns in continous)¶

ONE HOT ENCODING¶

In [349]:
X2ts=pd.get_dummies(Xts[cat_ts])
In [350]:
X2ts
Out[350]:
MSZoning_C (all) MSZoning_FV MSZoning_RH MSZoning_RL MSZoning_RM Street_Grvl Street_Pave Alley_Grvl Alley_Pave LotShape_IR1 ... SaleType_ConLw SaleType_New SaleType_Oth SaleType_WD SaleCondition_Abnorml SaleCondition_AdjLand SaleCondition_Alloca SaleCondition_Family SaleCondition_Normal SaleCondition_Partial
0 0 0 1 0 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 0 0 0 0 1 0
2 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 0 0 0 0 1 0
3 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 0 0 0 0 1 0
4 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 0 0 0 0 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1454 0 0 0 0 1 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1455 0 0 0 0 1 0 1 1 0 0 ... 0 0 0 1 1 0 0 0 0 0
1456 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 1 0 0 0 0 0
1457 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1458 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0

1459 rows × 234 columns

Join categorical and Continous columns¶

In [351]:
Xnew_ts=X1ts.join(X2ts)
In [352]:
Xnew_ts.shape
Out[352]:
(1459, 268)

Reindexing¶

In [353]:
Xnew_ts.index=range(0,1459)
In [354]:
keep=[]
drop=[]
for i in Xnew_ts.columns:
    if i in Xnew.columns:
        keep.append(i)
    else:
        drop.append(i)
In [355]:
keep
Out[355]:
['MSSubClass',
 'LotArea',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'BsmtFinSF1',
 'BsmtFinSF2',
 'TotalBsmtSF',
 '2ndFlrSF',
 'GrLivArea',
 'BsmtHalfBath',
 'FullBath',
 'KitchenAbvGr',
 'Fireplaces',
 'GarageCars',
 'GarageArea',
 'WoodDeckSF',
 'OpenPorchSF',
 'MSZoning_C (all)',
 'MSZoning_FV',
 'LandContour_Bnk',
 'LandContour_HLS',
 'LandContour_Low',
 'LandContour_Lvl',
 'Utilities_AllPub',
 'LotConfig_Corner',
 'LotConfig_CulDSac',
 'LotConfig_Inside',
 'Neighborhood_Blmngtn',
 'Neighborhood_CollgCr',
 'Neighborhood_Crawfor',
 'Neighborhood_Edwards',
 'Neighborhood_Gilbert',
 'Neighborhood_MeadowV',
 'Neighborhood_Mitchel',
 'Neighborhood_NAmes',
 'Neighborhood_NPkVill',
 'Neighborhood_NWAmes',
 'Neighborhood_NoRidge',
 'Neighborhood_NridgHt',
 'Neighborhood_OldTown',
 'Neighborhood_Sawyer',
 'Neighborhood_SawyerW',
 'Neighborhood_StoneBr',
 'Neighborhood_Timber',
 'Condition1_Artery',
 'Condition1_RRAe',
 'Condition1_RRAn',
 'BldgType_Duplex',
 'HouseStyle_2Story',
 'HouseStyle_SFoyer',
 'HouseStyle_SLvl',
 'RoofStyle_Hip',
 'RoofStyle_Mansard',
 'RoofMatl_CompShg',
 'Exterior1st_AsbShng',
 'Exterior1st_BrkFace',
 'Exterior1st_CBlock',
 'Exterior1st_CemntBd',
 'Exterior1st_HdBoard',
 'Exterior1st_Wd Sdng',
 'Exterior2nd_BrkFace',
 'Exterior2nd_CBlock',
 'Exterior2nd_CmentBd',
 'Exterior2nd_HdBoard',
 'Exterior2nd_ImStucc',
 'Exterior2nd_MetalSd',
 'Exterior2nd_Stucco',
 'Exterior2nd_VinylSd',
 'Exterior2nd_Wd Sdng',
 'MasVnrType_Stone',
 'ExterQual_Ex',
 'ExterCond_TA',
 'BsmtQual_Ex',
 'BsmtQual_Fa',
 'BsmtQual_Gd',
 'BsmtCond_Gd',
 'BsmtCond_TA',
 'BsmtExposure_Av',
 'BsmtExposure_Gd',
 'BsmtFinType1_ALQ',
 'BsmtFinType1_GLQ',
 'BsmtFinType1_Unf',
 'BsmtFinType2_ALQ',
 'BsmtFinType2_GLQ',
 'BsmtFinType2_Unf',
 'Heating_Wall',
 'CentralAir_N',
 'CentralAir_Y',
 'KitchenQual_Ex',
 'Functional_Min1',
 'Functional_Min2',
 'Functional_Typ',
 'FireplaceQu_Gd',
 'FireplaceQu_Po',
 'FireplaceQu_TA',
 'GarageType_Attchd',
 'GarageType_CarPort',
 'GarageFinish_Fin',
 'GarageFinish_RFn',
 'GarageFinish_Unf',
 'GarageQual_TA',
 'GarageCond_Fa',
 'GarageCond_Gd',
 'Fence_GdPrv',
 'Fence_GdWo',
 'Fence_MnPrv',
 'SaleType_CWD',
 'SaleCondition_Abnorml',
 'SaleCondition_Alloca',
 'SaleCondition_Partial']
In [356]:
drop
Out[356]:
['LotFrontage',
 'YearRemodAdd',
 'MasVnrArea',
 'BsmtUnfSF',
 '1stFlrSF',
 'BsmtFullBath',
 'HalfBath',
 'BedroomAbvGr',
 'TotRmsAbvGrd',
 'GarageYrBlt',
 'EnclosedPorch',
 '3SsnPorch',
 'ScreenPorch',
 'PoolArea',
 'MoSold',
 'YrSold',
 'MSZoning_RH',
 'MSZoning_RL',
 'MSZoning_RM',
 'Street_Grvl',
 'Street_Pave',
 'Alley_Grvl',
 'Alley_Pave',
 'LotShape_IR1',
 'LotShape_IR2',
 'LotShape_IR3',
 'LotShape_Reg',
 'LotConfig_FR2',
 'LotConfig_FR3',
 'LandSlope_Gtl',
 'LandSlope_Mod',
 'LandSlope_Sev',
 'Neighborhood_Blueste',
 'Neighborhood_BrDale',
 'Neighborhood_BrkSide',
 'Neighborhood_ClearCr',
 'Neighborhood_IDOTRR',
 'Neighborhood_SWISU',
 'Neighborhood_Somerst',
 'Neighborhood_Veenker',
 'Condition1_Feedr',
 'Condition1_Norm',
 'Condition1_PosA',
 'Condition1_PosN',
 'Condition1_RRNe',
 'Condition1_RRNn',
 'Condition2_Artery',
 'Condition2_Feedr',
 'Condition2_Norm',
 'Condition2_PosA',
 'Condition2_PosN',
 'BldgType_1Fam',
 'BldgType_2fmCon',
 'BldgType_Twnhs',
 'BldgType_TwnhsE',
 'HouseStyle_1.5Fin',
 'HouseStyle_1.5Unf',
 'HouseStyle_1Story',
 'HouseStyle_2.5Unf',
 'RoofStyle_Flat',
 'RoofStyle_Gable',
 'RoofStyle_Gambrel',
 'RoofStyle_Shed',
 'RoofMatl_Tar&Grv',
 'RoofMatl_WdShake',
 'RoofMatl_WdShngl',
 'Exterior1st_AsphShn',
 'Exterior1st_BrkComm',
 'Exterior1st_MetalSd',
 'Exterior1st_Plywood',
 'Exterior1st_Stucco',
 'Exterior1st_VinylSd',
 'Exterior1st_WdShing',
 'Exterior2nd_AsbShng',
 'Exterior2nd_AsphShn',
 'Exterior2nd_Brk Cmn',
 'Exterior2nd_Plywood',
 'Exterior2nd_Stone',
 'Exterior2nd_Wd Shng',
 'MasVnrType_BrkCmn',
 'MasVnrType_BrkFace',
 'MasVnrType_None',
 'ExterQual_Fa',
 'ExterQual_Gd',
 'ExterQual_TA',
 'ExterCond_Ex',
 'ExterCond_Fa',
 'ExterCond_Gd',
 'ExterCond_Po',
 'Foundation_BrkTil',
 'Foundation_CBlock',
 'Foundation_PConc',
 'Foundation_Slab',
 'Foundation_Stone',
 'Foundation_Wood',
 'BsmtQual_TA',
 'BsmtCond_Fa',
 'BsmtCond_Po',
 'BsmtExposure_Mn',
 'BsmtExposure_No',
 'BsmtFinType1_BLQ',
 'BsmtFinType1_LwQ',
 'BsmtFinType1_Rec',
 'BsmtFinType2_BLQ',
 'BsmtFinType2_LwQ',
 'BsmtFinType2_Rec',
 'Heating_GasA',
 'Heating_GasW',
 'Heating_Grav',
 'HeatingQC_Ex',
 'HeatingQC_Fa',
 'HeatingQC_Gd',
 'HeatingQC_Po',
 'HeatingQC_TA',
 'Electrical_FuseA',
 'Electrical_FuseF',
 'Electrical_FuseP',
 'Electrical_SBrkr',
 'KitchenQual_Fa',
 'KitchenQual_Gd',
 'KitchenQual_TA',
 'Functional_Maj1',
 'Functional_Maj2',
 'Functional_Mod',
 'Functional_Sev',
 'FireplaceQu_Ex',
 'FireplaceQu_Fa',
 'GarageType_2Types',
 'GarageType_Basment',
 'GarageType_BuiltIn',
 'GarageType_Detchd',
 'GarageQual_Fa',
 'GarageQual_Gd',
 'GarageQual_Po',
 'GarageCond_Ex',
 'GarageCond_Po',
 'GarageCond_TA',
 'PavedDrive_N',
 'PavedDrive_P',
 'PavedDrive_Y',
 'PoolQC_Ex',
 'PoolQC_Gd',
 'Fence_MnWw',
 'MiscFeature_Gar2',
 'MiscFeature_Othr',
 'MiscFeature_Shed',
 'SaleType_COD',
 'SaleType_Con',
 'SaleType_ConLD',
 'SaleType_ConLI',
 'SaleType_ConLw',
 'SaleType_New',
 'SaleType_Oth',
 'SaleType_WD',
 'SaleCondition_AdjLand',
 'SaleCondition_Family',
 'SaleCondition_Normal']
In [357]:
len(keep)
Out[357]:
111
In [358]:
len(Xnew_ts.columns)
Out[358]:
268
In [359]:
Xnewtest=Xnew_ts[keep]
In [360]:
len(Xnewtest.columns)
Out[360]:
111
In [361]:
Xnewtest
Out[361]:
MSSubClass LotArea OverallQual OverallCond YearBuilt BsmtFinSF1 BsmtFinSF2 TotalBsmtSF 2ndFlrSF GrLivArea ... GarageQual_TA GarageCond_Fa GarageCond_Gd Fence_GdPrv Fence_GdWo Fence_MnPrv SaleType_CWD SaleCondition_Abnorml SaleCondition_Alloca SaleCondition_Partial
0 -0.874711 0.363929 -0.751101 0.400766 -0.340945 0.063295 0.517348 -0.370808 -0.775254 -1.215588 ... 1 0 0 0 0 1 0 0 0 0
1 -0.874711 0.897861 -0.054877 0.400766 -0.439695 1.063392 -0.297903 0.639144 -0.775254 -0.323539 ... 1 0 0 0 0 1 0 0 0 0
2 0.061351 0.809646 -0.751101 -0.497418 0.844059 0.773254 -0.297903 -0.266876 0.891944 0.294508 ... 1 0 0 0 0 1 0 0 0 0
3 0.061351 0.032064 -0.054877 0.400766 0.876976 0.357829 -0.297903 -0.271395 0.837243 0.243004 ... 1 0 0 0 0 1 0 0 0 0
4 1.465443 -0.971808 1.337571 -0.497418 0.679475 -0.387298 -0.297903 0.528434 -0.775254 -0.424487 ... 1 0 0 0 0 1 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1454 2.401505 -1.591330 -1.447325 1.298950 -0.044694 -0.965376 -0.297903 -1.129968 0.523306 -0.811797 ... 1 0 0 0 0 1 0 0 0 0
1455 2.401505 -1.599808 -1.447325 -0.497418 -0.044694 -0.411477 -0.297903 -1.129968 0.523306 -0.811797 ... 1 0 0 0 0 1 0 1 0 0
1456 -0.874711 2.055150 -0.751101 1.298950 -0.373861 1.724994 -0.297903 0.401907 -0.775254 -0.539856 ... 1 0 0 0 0 1 0 1 0 0
1457 0.646389 0.125527 -0.751101 -0.497418 0.679475 -0.224645 -0.297903 -0.303026 -0.775254 -1.063136 ... 1 0 0 0 0 1 0 0 0 0
1458 0.061351 -0.038790 0.641347 -0.497418 0.712392 0.700719 -0.297903 -0.113237 1.612573 1.058827 ... 1 0 0 0 0 1 0 0 0 0

1459 rows × 111 columns

In [362]:
pred=cvmodel.predict(Xnewtest)
In [371]:
pred
Out[371]:
array([128619.54163418, 181267.41238455, 203420.31328191, ...,
       194632.16524631, 113066.61351441, 220273.64798307])
In [364]:
predn={'Output':list(pred)}
In [365]:
predn
Out[365]:
{'Output': [128619.54163417635,
  181267.41238455163,
  203420.31328190648,
  208505.12629196717,
  193287.82953280647,
  175473.42135002807,
  172241.51479889159,
  162955.82157118394,
  212460.735535882,
  117419.37648884654,
  173083.4575579874,
  86912.57551401446,
  83300.40387287128,
  143236.7747776521,
  93459.2494401347,
  362015.23027741653,
  257273.73001686478,
  307661.700372195,
  316565.6093333476,
  443134.65787934186,
  315380.12608578824,
  205498.82957948718,
  184457.33956293,
  160868.15777053393,
  183287.78756575895,
  193954.03519054374,
  322535.0345734032,
  221391.48048978936,
  203466.23695817083,
  249962.56890286907,
  201776.56812867738,
  98650.73886421438,
  211557.61477046268,
  279620.53376313037,
  284115.9751103569,
  237707.8828542604,
  173157.37485051222,
  163679.27103099757,
  161584.5194995456,
  152259.63479940838,
  169543.26235331167,
  136490.71289648273,
  291476.0268809061,
  245721.67955630447,
  232483.51487052318,
  203459.6887365454,
  263803.7912600847,
  220004.32769759384,
  169967.90569522794,
  151674.9918943154,
  145278.8907203432,
  167333.6076380156,
  157843.7703973431,
  157394.29041673365,
  212915.73032727378,
  170021.72800699316,
  184288.2178273415,
  133017.350211378,
  216270.08916812157,
  130781.93998852264,
  145194.37778841367,
  179904.25513872394,
  115792.13947014339,
  124171.3262741004,
  123000.93222437454,
  90354.99947130015,
  101873.30233350019,
  138255.1436780951,
  160161.9210119685,
  196147.99927628879,
  105645.43591798328,
  83368.09500460947,
  158549.94195091032,
  125353.65292864472,
  156881.2933840779,
  102731.11234874609,
  46812.162150738906,
  157895.0968564257,
  193462.41001726355,
  104749.72323715265,
  138841.93607726504,
  155030.25141426723,
  184557.33119497873,
  88375.99232902998,
  120425.3294386329,
  134426.8498219446,
  141430.07278870634,
  151688.21343676088,
  120180.47105872983,
  148674.4070052856,
  110211.69793278475,
  146634.90997122886,
  141179.2798183791,
  108737.82423087404,
  188073.58699021034,
  68199.25977249467,
  79377.06490122585,
  103269.88061856452,
  68215.41798916271,
  116865.05661466887,
  129844.14304129701,
  119942.02014348311,
  126530.5030561865,
  163938.35576921774,
  154486.62584313756,
  251351.76072241075,
  81482.12689724434,
  238352.98996795065,
  147486.8248120479,
  139652.39547012906,
  108212.35050845421,
  149354.29557758424,
  274160.7538744507,
  132598.83805009708,
  224942.6116872906,
  258463.23294803794,
  184641.84145427353,
  150707.64514230398,
  137877.54687498463,
  197202.32576462685,
  172484.5883846825,
  131559.25796515468,
  287168.5000440791,
  223413.6494416277,
  126589.33991508199,
  52070.88728626759,
  100814.67854133894,
  171835.63071639248,
  100911.78570583947,
  133612.73885403635,
  88832.17322731821,
  109277.52985441478,
  121227.17784286755,
  171211.75894197854,
  116970.50836709345,
  230510.01918226056,
  216338.51964397958,
  232150.79890722816,
  178827.17546011013,
  164053.17471562588,
  51136.448036718735,
  123049.81908471082,
  55104.89073950038,
  279460.14824160095,
  236751.89069170295,
  169119.25751172245,
  187813.04046182078,
  211240.82550163392,
  183541.06010649423,
  154217.28956907112,
  141627.00801649518,
  188577.70570975062,
  172999.55749243338,
  135588.43111118456,
  95607.79045315352,
  62814.28114868947,
  85738.41069458734,
  116880.76908085127,
  153164.71263124276,
  169509.05028018955,
  139009.4504265678,
  146891.06389495684,
  255629.465435273,
  220087.7374826832,
  128690.72179397826,
  177859.4610986802,
  196677.1191978652,
  284900.3735153895,
  177775.78904516535,
  353488.032511316,
  232509.78314136842,
  250197.30815918307,
  173865.0221116657,
  189612.3247740066,
  177320.3269139025,
  145649.4246050287,
  205462.62612162923,
  203981.5741134919,
  193661.00474184265,
  260077.10582479465,
  182363.73926318352,
  258164.10638902456,
  223406.73042868773,
  242198.56136660618,
  203987.91378937333,
  159614.03623229434,
  164035.4895899253,
  126426.24714533452,
  131897.76572541744,
  118170.88922423546,
  117736.8398005633,
  83282.05628980428,
  87664.86835625253,
  139137.15099668736,
  126992.57777656309,
  137903.00926358765,
  145728.69224304473,
  138381.6266988739,
  113871.87499734623,
  152897.42697754235,
  419680.20119212987,
  388960.0759825439,
  374325.92198993586,
  458044.3162161857,
  323227.53547721315,
  333300.6632488226,
  403012.3909114887,
  343814.1743242519,
  315524.33475551003,
  348551.42805390176,
  266979.70741507993,
  393215.08033110143,
  291355.92435891926,
  249419.13378742625,
  199282.7671119187,
  202625.8199396676,
  218615.34407486755,
  452330.57395683345,
  378647.6560687193,
  326767.6120037353,
  257257.02253083442,
  328053.0557235745,
  183506.39716579905,
  169814.27725082386,
  170828.4088531682,
  166034.79268636164,
  156972.00014888385,
  186297.66822351035,
  190601.51812659076,
  193327.28548518475,
  182221.08709545757,
  268067.3755555443,
  160916.79720608296,
  186065.04881562156,
  158656.2679260043,
  273070.1641071062,
  164983.57276284596,
  332295.7996589275,
  300981.9002827132,
  260158.12190448292,
  274038.55353309016,
  238328.00760495386,
  225997.95020396053,
  267817.6218277288,
  249436.9262987965,
  411294.64584781625,
  229320.67959538242,
  204780.51366573788,
  257019.31419110723,
  224990.7109449404,
  278629.30720957764,
  250342.55664141837,
  282860.2977423025,
  231337.1448480937,
  214487.75493300462,
  183418.4427488816,
  173453.4365672351,
  138781.19900487043,
  223043.45995872744,
  240489.8879431992,
  161466.61287291133,
  122152.98689316006,
  162654.41198556882,
  206284.74597470497,
  247916.4619258853,
  185089.86379234266,
  134594.944012237,
  172819.40417551133,
  174810.2163939708,
  175637.41994409123,
  120383.86093112042,
  150316.8856829818,
  111287.15749159014,
  119656.25524338023,
  105841.88269064976,
  112804.58888209087,
  292375.4744044506,
  258935.13879353483,
  254178.8661206039,
  206494.24250239175,
  182140.16478634824,
  169580.86499773315,
  170786.23133783767,
  329022.8523552219,
  222234.43244786834,
  195010.6015215246,
  238524.72990757268,
  237737.11604299102,
  153857.80487626657,
  135888.93775763552,
  242915.5337418698,
  106232.20578173295,
  157903.66220058288,
  201838.1983457667,
  179130.72586086148,
  136490.72033411168,
  121280.53433634565,
  149761.59858383454,
  168573.68854867184,
  164043.36494944184,
  154259.66841634116,
  197869.78952803207,
  194183.78166712236,
  112538.09655279062,
  185717.22243808483,
  193739.15832993633,
  227118.99183960754,
  149310.12774235438,
  174044.48659071725,
  156863.11764992925,
  128486.77000650452,
  150760.98118819034,
  132346.70819971367,
  146422.6914801855,
  147201.99883197545,
  130406.4178142635,
  102211.88006394525,
  146267.7567281954,
  119552.27793918586,
  166491.37865569332,
  130600.33065574756,
  77577.64971555775,
  143486.3841462509,
  87648.60956278263,
  137371.84499122578,
  164418.2586221785,
  149844.74947114437,
  34019.33974228744,
  103947.23541924162,
  53287.03813869228,
  216440.61915060287,
  156993.1038575158,
  128139.29083812867,
  158642.136886008,
  143579.0632480821,
  141195.68923931775,
  122248.95928696071,
  123679.49900089442,
  106920.8371881828,
  124355.25608811574,
  138487.5240692736,
  140752.39834436926,
  157811.99411505996,
  131732.8009121948,
  135276.47907589813,
  123149.90596701154,
  150810.52861432347,
  123345.50251675304,
  118240.48185997871,
  141910.1308653501,
  84468.07824395079,
  114107.95107663448,
  123534.79920800467,
  92484.29603716082,
  35169.619400984986,
  102269.58962253087,
  111701.36821643542,
  173518.48759940328,
  136142.59065359624,
  24157.130033301888,
  117849.58064085303,
  149203.4139550824,
  29178.75487903082,
  143600.73319970554,
  130588.6233035777,
  97380.73276868415,
  99845.47183938444,
  141226.25925727873,
  129405.20419049877,
  149367.5876736489,
  155552.8474812678,
  75960.8619343447,
  147336.12247002154,
  112873.74479034185,
  107139.8954055376,
  144574.94357481404,
  57840.876484943175,
  104028.20375822659,
  99306.91424731584,
  132025.14484165862,
  147288.1133253581,
  91885.48365128759,
  147875.802046834,
  152772.66663399315,
  162529.3414488091,
  153917.5041870882,
  178836.31862678847,
  25989.285006882958,
  119796.0770651536,
  119927.44194330792,
  162426.09140559906,
  127378.83832410388,
  113718.75930441405,
  169014.763695116,
  174238.8161381276,
  226426.87658928602,
  154275.9233190209,
  158547.93650968684,
  95983.3504785064,
  141641.608764336,
  96988.09928061126,
  322026.33380757587,
  313940.3382686051,
  314016.7381243104,
  345282.62484694796,
  338611.41443840176,
  217590.21598264732,
  297890.31722380087,
  204451.01188941952,
  221901.92270848752,
  266010.05145619076,
  175469.32804302272,
  264110.64079206693,
  140972.19365717104,
  205870.6894498053,
  210026.16837864742,
  218371.51471394952,
  206110.14859798897,
  159558.0871915376,
  132968.58388197728,
  253126.38751863665,
  248856.10020707987,
  189858.24992911742,
  210834.9916687439,
  250033.029085887,
  293174.1837162627,
  211682.513205599,
  251560.55292357795,
  173398.7426643374,
  128058.59879175501,
  133279.49094393518,
  108227.18834545341,
  142525.59904521468,
  132773.39285165403,
  139193.3142763981,
  132890.13193127676,
  117768.11030693068,
  122873.87620157842,
  164820.6993445139,
  150131.76209305244,
  194784.10798249958,
  191944.22738656015,
  235323.57251029936,
  166965.14037005816,
  216779.349981811,
  174905.8582729673,
  244794.91776452854,
  116085.69278705944,
  125232.1733653733,
  123530.21355482974,
  239735.83048340006,
  334003.6063616342,
  184120.00334727205,
  62248.469765472204,
  335611.63285304426,
  64978.86050818854,
  245919.11138372854,
  138664.7467910016,
  173528.57472020548,
  162410.69301387342,
  386048.0163351034,
  338491.93351079576,
  235956.87831387596,
  228045.3144996917,
  202738.0349215535,
  381107.678277927,
  138691.07782954784,
  168741.29442780375,
  133458.47931905574,
  116472.30316772507,
  133306.59660366803,
  139001.15625670445,
  194859.50399222673,
  183235.62299993672,
  175429.93552398332,
  211938.09429543163,
  186378.72614084586,
  177632.5485002152,
  250087.42644148896,
  188193.7223491299,
  180776.33508040832,
  178019.1592126292,
  225115.1600155022,
  388927.9706977082,
  402025.88036517473,
  180605.04388539997,
  344177.74155986146,
  251078.9351806614,
  249260.69049376465,
  189915.7335334244,
  259226.92335503915,
  220456.2926463175,
  170189.01993691904,
  182273.6937275533,
  131705.87909349773,
  294771.792602289,
  158773.48577693244,
  272827.28058255906,
  147329.4879050749,
  100330.0166302695,
  123844.99452278021,
  87602.0735175309,
  102945.80058652573,
  99289.22763186783,
  132162.49255380448,
  130601.02948291013,
  308307.09803415055,
  407875.2163888662,
  379942.66686591064,
  406033.07677980186,
  435110.20750099246,
  371113.66078709124,
  285527.0315745531,
  346990.54248735274,
  445351.9888733671,
  266438.09431402606,
  348471.9903355726,
  351458.52111121954,
  306017.6833458324,
  205929.5763815571,
  335718.4262758917,
  209962.76198077388,
  198291.52572507318,
  174015.74837911,
  219704.37185659894,
  212529.84198832867,
  200100.58211169517,
  169307.43171977552,
  190551.02199003223,
  207656.26694761612,
  230834.24741797004,
  220092.12796749035,
  161862.76364794822,
  245766.98016176105,
  185075.87946814863,
  239992.03881152885,
  309443.4864279112,
  302208.0741662002,
  291919.80509931594,
  303097.499772851,
  274889.1499123482,
  254398.98633814737,
  246933.84425476604,
  274831.3833191863,
  241663.40726171125,
  228717.7354447676,
  239975.61733118526,
  233176.05417436076,
  207692.53887622734,
  197896.45769113608,
  143460.61667708142,
  167940.14631263554,
  188708.77827277518,
  194505.63476557165,
  225160.4971553901,
  198786.8340971219,
  201420.48269142833,
  113469.16992585725,
  136661.57212278212,
  61189.21169455445,
  93487.98672383177,
  207515.25296070008,
  159902.917487714,
  273708.13274843316,
  322586.972298646,
  166555.34525446853,
  154898.46357972422,
  149513.08888226585,
  176376.1518861623,
  254928.37145430656,
  227896.67233849713,
  251563.67707542697,
  245443.8992659906,
  166408.77191242692,
  233918.68482579262,
  193808.5654934799,
  198199.16254202387,
  301034.5023979384,
  214401.2873264496,
  324087.55757506867,
  301275.4444862843,
  214552.75369448224,
  183664.62804140613,
  177892.7952541086,
  215709.86408888898,
  142134.07019394962,
  158784.0169270339,
  135707.12558452316,
  149865.93246221985,
  179563.1614313737,
  92969.1064445287,
  111428.23078014061,
  146703.87888479664,
  76507.85731249789,
  162705.23574766333,
  131781.66866459174,
  107945.93832682495,
  217417.62059672526,
  132257.77292668677,
  170341.42473113988,
  198902.54988606897,
  133556.75117973142,
  116353.14547452329,
  143919.6634576852,
  131504.04320362952,
  178670.57689850274,
  122583.29951215166,
  158884.11978355795,
  93670.68224975886,
  108856.79946554037,
  82360.18258696148,
  145098.22949056604,
  136409.70557252158,
  180918.43910766047,
  182045.39891813495,
  126985.5026006962,
  162914.42477932238,
  134982.07349256316,
  137565.82288815588,
  119560.11217191447,
  127383.34079224296,
  141341.87680346455,
  155977.16684320575,
  108406.42617256037,
  114954.60420593186,
  126886.7782077304,
  119884.35905278388,
  98566.57515327036,
  73971.2535428722,
  112575.3381037023,
  90285.97455480846,
  124222.24040423802,
  130564.42491880953,
  104470.4382754828,
  139518.7563506431,
  82413.50549180733,
  90897.79446964027,
  172168.6610810706,
  31130.636891197326,
  69687.10687275512,
  105891.21273325573,
  120795.49571392924,
  87662.14785325763,
  157641.39016843506,
  131223.97664220197,
  42456.27121549925,
  200361.93569875247,
  121862.0068732385,
  114042.08839147842,
  126961.51913614321,
  141496.01829580145,
  148328.95726053143,
  122222.13711650841,
  114968.4099804684,
  167341.26370384262,
  108059.10526994645,
  152197.8486420315,
  130287.24661621814,
  106615.68051987568,
  113820.55528213378,
  109160.92488873807,
  118936.63130535289,
  73756.30681455025,
  176526.79336712413,
  140681.0977793789,
  147038.05634572182,
  171530.82361853076,
  131679.54010120546,
  94556.11765701493,
  160877.77683364513,
  160466.61070283077,
  106305.17283241215,
  129724.87569730084,
  131348.8347855821,
  111107.36634598744,
  40130.33481951714,
  113578.7147627059,
  136659.16826898122,
  147692.51266568783,
  135893.2686606431,
  173904.11389149915,
  128795.52460446139,
  146745.45906618115,
  101931.99553987886,
  144772.67556704505,
  201495.08376360196,
  127560.40967698125,
  131820.79997462896,
  146288.01139538133,
  254830.65630518371,
  122847.29035715957,
  200924.84493024633,
  172677.00809002103,
  102495.23482367981,
  154930.20873199304,
  259891.97882392904,
  232299.76701607287,
  251416.1328477153,
  208684.50555646227,
  189843.65804594412,
  229408.56542634056,
  358515.3760271539,
  347017.4145720037,
  239173.26381754197,
  211677.31762574048,
  159919.21733216514,
  234124.4147958851,
  214378.98436044104,
  201939.62832987707,
  226508.01905510027,
  157129.24763779656,
  140525.25099918986,
  178635.2525477113,
  226318.99906529943,
  270783.47652106953,
  312572.0366401439,
  243405.7924238756,
  213338.89569825615,
  124585.64136365682,
  233058.73332046653,
  198746.94358039825,
  226560.5573000217,
  194342.3141809795,
  122948.48085524449,
  131320.09796043186,
  160815.73639056072,
  156555.53002074687,
  162672.39886490387,
  375411.4969561524,
  70928.00470279244,
  64772.52181335845,
  60948.73599530561,
  126762.3598139446,
  93426.95772781494,
  113121.98575051935,
  106766.91445468774,
  111547.96562183488,
  156149.7862562855,
  186715.67811361508,
  150843.384440182,
  159755.16528835805,
  204996.73514046453,
  162540.8668009098,
  194047.060313311,
  146085.24838818767,
  151825.20721949558,
  220531.3631625793,
  259665.9475385384,
  285986.560117865,
  132896.58650886835,
  120044.68651752,
  131698.3592291716,
  110130.7688730679,
  132588.06130518793,
  104657.34454906746,
  173128.24009170136,
  77309.84502806635,
  68629.20947054058,
  56419.19746716223,
  49704.085073517315,
  336265.2706663419,
  302300.75224868045,
  292352.3269150258,
  210915.74517444128,
  133492.01584359235,
  200323.72100351183,
  184275.052691116,
  281307.7907602078,
  260673.35135512133,
  150432.72557673792,
  224543.5440279602,
  187040.65062796694,
  201320.01737223082,
  249887.20751797588,
  237226.3237115249,
  259459.99625907713,
  302922.78170061857,
  185400.4750297852,
  112064.59497064233,
  158962.14381758997,
  149089.2733528349,
  130966.81233318643,
  133036.37370755695,
  102407.59434725433,
  119272.03625520163,
  149990.26286600233,
  121324.78669192991,
  128862.92889859491,
  116234.70401446173,
  131875.89654952625,
  177293.07246183394,
  198449.1192338573,
  155000.2308029216,
  174831.31794534894,
  193880.0045913673,
  167977.27080307202,
  218912.94436963554,
  153785.400587891,
  178293.20839680638,
  158354.80221531208,
  213446.31691004167,
  241509.72755168233,
  381709.5791099947,
  488612.5461315876,
  196293.94987779635,
  319174.2319094921,
  380847.24277045194,
  424457.97986405063,
  154889.68221707462,
  194184.63620584935,
  225634.30604758515,
  198050.4339120744,
  161958.63500525866,
  191506.69731772316,
  170294.54434295304,
  209297.1627328276,
  193864.57573966013,
  159581.12769456164,
  139313.10897406953,
  126292.09606777696,
  145084.98914538164,
  173158.2331322017,
  103926.03235701073,
  118118.22475525433,
  144550.62880786767,
  121695.1463207607,
  372377.6786564333,
  294488.2122048285,
  347823.27546554,
  437438.0601244591,
  345954.0984406314,
  398377.6035266182,
  445731.03369703237,
  385446.80444999755,
  435927.3772744221,
  295851.07901514426,
  371546.7568160014,
  353599.6915863116,
  349195.44655897835,
  335301.2529948537,
  331789.8540438929,
  257039.27579359998,
  245910.382444328,
  257076.18947664372,
  200739.3766332998,
  190691.13210324885,
  196559.99165674133,
  233070.78369996487,
  292551.50153842685,
  208599.33278463938,
  201876.94013330532,
  192411.59469157428,
  168565.65695351054,
  194044.77949963955,
  184099.41418459476,
  202021.915566706,
  199149.498539752,
  188427.5381734065,
  187397.7783176303,
  183805.53769286769,
  233559.7652366041,
  194614.62685041828,
  196604.46568407258,
  177255.1817469581,
  212590.95908034313,
  168580.18033981067,
  201734.8531993755,
  229114.26094394483,
  198300.24287860395,
  189088.70485906256,
  324724.1210344498,
  376737.7947853871,
  313319.5154516617,
  261871.37056274098,
  284412.04841857066,
  306187.9629509704,
  210093.24522748866,
  267099.8983230137,
  220618.29508434574,
  377714.6664988933,
  214243.4480808713,
  228939.9893089747,
  224328.57407875467,
  223710.45449361057,
  223189.32869000005,
  223304.6332364924,
  208587.49959683645,
  251682.33346060483,
  200120.12264280915,
  319319.2288545869,
  278119.4071042206,
  242489.97198969783,
  258539.21531937778,
  150440.19860834675,
  144583.62720763969,
  156107.070757178,
  186487.215963631,
  203003.04489476088,
  126762.20955490823,
  107493.24070438655,
  146677.137264568,
  276249.6022638167,
  150413.5861685898,
  171111.95131739188,
  200033.85062913864,
  181598.00419854297,
  209924.63369212567,
  222615.9995366694,
  197896.55425482555,
  171064.96351686222,
  169827.71860964477,
  202465.9755097495,
  289671.07463102206,
  301883.3609263044,
  212176.85695324623,
  291074.4591941243,
  347434.4354102528,
  157686.1761761543,
  234777.8763897896,
  136452.6401381728,
  161467.73353373425,
  195955.42496246597,
  197174.88930296068,
  244507.03964543407,
  164256.24951744507,
  126555.66741580388,
  134718.66319660176,
  104997.15483180751,
  112045.1907411151,
  170865.6622449701,
  147383.79877245988,
  124020.06226216687,
  177539.87180244946,
  151354.93565685695,
  211970.05973688496,
  146482.6057162064,
  230404.69114908646,
  116428.81205621507,
  41958.45529382586,
  52182.14602956311,
  118985.88949847104,
  117838.9677297641,
  151191.8462024478,
  155788.92554788012,
  147929.12078612452,
  138841.9527853175,
  134479.1737191509,
  151656.448075258,
  120672.47122831421,
  188611.18516335014,
  111296.70952224464,
  170534.06511737744,
  132599.22814681334,
  167875.67139676126,
  140134.67879721677,
  137537.81662152894,
  138675.95623851777,
  129640.51136516451,
  126239.21594101338,
  122002.49106202208,
  142283.35953401844,
  119186.07245047089,
  112316.57948145832,
  160009.41858635691,
  240660.01761122164,
  128496.99834162091,
  119119.29291477059,
  182768.554756602,
  106635.77627423295,
  133743.1111455553,
  100703.78025293013,
  147891.08312291978,
  144852.1778446886,
  143268.07847225864,
  164739.1364539423,
  117631.14974755234,
  106081.42165993301,
  119646.58843606038,
  91780.83056136162,
  129138.63602255729,
  95012.56431707335,
  88090.80238405739,
  134022.63847346834,
  119765.24506587154,
  81067.07143234494,
  139345.006792638,
  185021.2420155868,
  138517.88247785234,
  106677.65131889516,
  159120.42479664803,
  121281.16210358296,
  200673.35257717862,
  97428.34977336612,
  115280.38024065825,
  95148.67108105743,
  167477.63171060663,
  144219.61413544358,
  128423.84499309101,
  111181.40049703034,
  144490.08238271446,
  ...]}
In [366]:
ID=df2['Id']
In [367]:
ID
Out[367]:
0       1461
1       1462
2       1463
3       1464
4       1465
        ... 
1454    2915
1455    2916
1456    2917
1457    2918
1458    2919
Name: Id, Length: 1459, dtype: int64
In [368]:
df_final1=pd.DataFrame(ID)
In [369]:
df_final1
Out[369]:
Id
0 1461
1 1462
2 1463
3 1464
4 1465
... ...
1454 2915
1455 2916
1456 2917
1457 2918
1458 2919

1459 rows × 1 columns

In [372]:
df_final2=pd.DataFrame(predn)
In [373]:
df_final2
Out[373]:
Output
0 128619.541634
1 181267.412385
2 203420.313282
3 208505.126292
4 193287.829533
... ...
1454 80304.088670
1455 79292.599894
1456 194632.165246
1457 113066.613514
1458 220273.647983

1459 rows × 1 columns

Final Output¶

In [374]:
df_final=df_final1.join(df_final2)
In [375]:
df_final
Out[375]:
Id Output
0 1461 128619.541634
1 1462 181267.412385
2 1463 203420.313282
3 1464 208505.126292
4 1465 193287.829533
... ... ...
1454 2915 80304.088670
1455 2916 79292.599894
1456 2917 194632.165246
1457 2918 113066.613514
1458 2919 220273.647983

1459 rows × 2 columns

In [ ]: